tripal_analysis_kegg.module 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003
  1. <?php
  2. /*******************************************************************************
  3. *
  4. ******************************************************************************/
  5. function tripal_analysis_kegg_init(){
  6. // add the tripal_analysis_kegg JS and CSS
  7. drupal_add_js(drupal_get_path('theme', 'tripal').'/js/tripal_analysis_kegg.js');
  8. drupal_add_css(drupal_get_path('theme', 'tripal').'/css/tripal_analysis_kegg.css');
  9. // add the jsTree JS and CSS
  10. drupal_add_css(drupal_get_path('theme', 'tripal').'/js/jsTree/source/tree_component.css');
  11. drupal_add_js (drupal_get_path('theme', 'tripal').'/js/jsTree/source/_lib.js');
  12. drupal_add_js (drupal_get_path('theme', 'tripal').'/js/jsTree/source/tree_component.js');
  13. }
  14. /*******************************************************************************
  15. * Provide information to drupal about the node types that we're creating
  16. * in this module
  17. */
  18. function tripal_analysis_kegg_node_info() {
  19. $nodes = array();
  20. $nodes['chado_analysis_kegg'] = array(
  21. 'name' => t('Analysis: KEGG'),
  22. 'module' => 'chado_analysis_kegg',
  23. 'description' => t('Results from a KEGG/KAAS analysis'),
  24. 'has_title' => FALSE,
  25. 'title_label' => t('Analysis: KEGG'),
  26. 'has_body' => FALSE,
  27. 'body_label' => t('KEGG Analysis Description'),
  28. 'locked' => TRUE
  29. );
  30. return $nodes;
  31. }
  32. /*******************************************************************************
  33. * Menu items are automatically added for the new node types created
  34. * by this module to the 'Create Content' Navigation menu item. This function
  35. * adds more menu items needed for this module.
  36. */
  37. function tripal_analysis_kegg_menu() {
  38. $items['brite/%'] = array(
  39. 'title' => t('KEGG BRITE'),
  40. 'page callback' => 'tripal_analysis_kegg_brite',
  41. 'page arguments' => array(1, 2),
  42. 'access arguments' => array('access content'),
  43. 'type' => MENU_CALLBACK
  44. );
  45. $items['tripal_analysis_kegg_org_report/%'] = array(
  46. 'path' => 'tripal_analysis_kegg_org_report',
  47. 'title' => t('Analysis KEGG report'),
  48. 'page callback' => 'tripal_analysis_kegg_org_report',
  49. 'page arguments' => array(1),
  50. 'access arguments' => array('access chado_analysis_kegg content'),
  51. 'type' => MENU_CALLBACK
  52. );
  53. return $items;
  54. }
  55. /*******************************************************************************
  56. * Set the permission types that the chado module uses. Essentially we
  57. * want permissionis that protect creation, editing and deleting of chado
  58. * data objects
  59. */
  60. function tripal_analysis_kegg_perm(){
  61. return array(
  62. 'access chado_analysis_kegg content',
  63. 'create chado_analysis_kegg content',
  64. 'delete chado_analysis_kegg content',
  65. 'edit chado_analysis_kegg content',
  66. );
  67. }
  68. /*******************************************************************************
  69. * The following function proves access control for users trying to
  70. * perform actions on data managed by this module
  71. */
  72. function chado_analysis_kegg_access($op, $node, $account){
  73. if ($op == 'create') {
  74. return user_access('create chado_analysis_kegg content', $account);
  75. }
  76. if ($op == 'update') {
  77. if (user_access('edit chado_analysis_kegg content', $account)) {
  78. return TRUE;
  79. }
  80. }
  81. if ($op == 'delete') {
  82. if (user_access('delete chado_analysis_kegg content', $account)) {
  83. return TRUE;
  84. }
  85. }
  86. if ($op == 'view') {
  87. if (user_access('access chado_analysis_kegg content', $account)) {
  88. return TRUE;
  89. }
  90. }
  91. return FALSE;
  92. }
  93. /*******************************************************************************
  94. */
  95. function tripal_analysis_kegg_brite($analysis_id, $type_id, $ajax){
  96. // If not called by ajax
  97. if (!$ajax) {
  98. $content .=
  99. "<div id=\"tripal_kegg_brite_results\" class=\"tripal_kegg_brite-info-box\">
  100. <table>
  101. <tr>
  102. <th>KEGG BRITE</th>
  103. <th id=\"tripal_kegg_brite_header\">Hierarchy:</th>
  104. </tr>
  105. <tr>
  106. <td nowrap valign=\"top\">
  107. ";
  108. // List all BRITE terms on the left
  109. $sql = "SELECT DISTINCT CVT.name, CVT.cvterm_id
  110. FROM {cvterm} CVT
  111. INNER JOIN analysisprop AP ON CVT.cvterm_id = AP.type_id
  112. WHERE AP.analysis_id = %d AND CVT.definition LIKE 'KEGG BRITE term: %'
  113. ORDER BY CVT.cvterm_id";
  114. $previous_db = tripal_db_set_active('chado');
  115. $result = db_query($sql, $analysis_id);
  116. tripal_db_set_active($previous_db);
  117. while ($brite_term = db_fetch_object($result)) {
  118. $url = url("brite/$analysis_id/$brite_term->cvterm_id/1");
  119. $content .= "<li class=\"tripal_kegg_brite_terms\"><a onclick=\"return tripal_update_brite(".
  120. "this,$brite_term->cvterm_id)\" href=\"$url\">
  121. $brite_term->name
  122. </a></li>";
  123. }
  124. // Show the hierarchy tree
  125. $content .="</td>
  126. <td nowrap id=\"tripal_kegg_brite_hierarchy\" valign=\"top\">";
  127. $content .= "<i>Note:</i> Click a BRITE term for its functional hierarchy";
  128. // If called by ajax, generate tree structure
  129. } else {
  130. // Get BRITE term from cvterm table
  131. $previous_db = tripal_db_set_active('chado');
  132. $sql = 'SELECT name FROM {cvterm} WHERE cvterm_id=%d';
  133. $brite_term = db_result(db_query($sql, $type_id));
  134. // Get BRITE hierarchy tree
  135. $sql = "SELECT value
  136. FROM {analysisprop} AP
  137. INNER JOIN CVterm CVT on AP.type_id = CVT.cvterm_id
  138. INNER JOIN CV on CVT.cv_id = CV.cv_id
  139. WHERE CV.name = 'tripal' and CVT.name = '%s'
  140. AND AP.analysis_id = %d";
  141. $result = db_fetch_object(db_query($sql, $brite_term, $analysis_id));
  142. tripal_db_set_active($previous_db);
  143. $content .= "<div class=\"tripal_kegg_brite_tree\" id=\"tripal_kegg_brite_tree_$type_id\">$result->value</div>";
  144. }
  145. if (!$ajax) {
  146. $content .= " </td>
  147. </tr>
  148. </table>
  149. </div>";
  150. }
  151. // since this function provides output for addition into
  152. // an analysis page, as well as an AJAX refresh of content
  153. // within the BRITE hierarchy we need to setup the return
  154. // different depending on the request type
  155. if($ajax){
  156. drupal_json(array('update' => $content,
  157. 'id' => "tripal_kegg_brite_tree_$type_id",
  158. 'brite_term' => "Hierarchy: $brite_term"));
  159. } else {
  160. return $content;
  161. }
  162. }
  163. /*******************************************************************************
  164. * Provide a KEGG Analysis form
  165. */
  166. function chado_analysis_kegg_form ($node){
  167. // add in the default fields
  168. $form = chado_analysis_form($node);
  169. // set the defaults
  170. $kegg = $node->analysis->tripal_analysis_kegg;
  171. $query_re = $kegg->query_re;
  172. $query_type = $kegg->query_type;
  173. $query_uniquename = $kegg->query_uniquename;
  174. $hierfile = $kegg->hierfile;
  175. $moreSettings ['kegg'] = 'KEGG Analysis Settings';
  176. $form['kegg'] = array(
  177. '#title' => t('KEGG Settings'),
  178. '#type' => 'fieldset',
  179. '#description' => t('Specific Settings for KEGG Analysis.'),
  180. '#collapsible' => TRUE,
  181. '#attributes' => array('id' => 'kegg-extra-settings'),
  182. '#weight' => 11
  183. );
  184. $form['kegg']['hierfile'] = array(
  185. '#title' => t('KAAS hier.tar.gz Output File'),
  186. '#type' => 'textfield',
  187. '#description' => t('The full path to the hier.tar.gz file generated by KAAS.
  188. Alternatively, you can input the full path to the directory
  189. that contains decompressed kegg files.'),
  190. '#default_value' => $hierfile,
  191. );
  192. $form['kegg']['query_re'] = array(
  193. '#title' => t('Query Name RE'),
  194. '#type' => 'textfield',
  195. '#description' => t('Enter the regular expression that will extract the '.
  196. 'feature name from the results line in the KEGG heir results. This will be '.
  197. 'the same as the definition line in the query FASTA file used for the analysis. This option is '.
  198. 'is only required when the query does not identically match a feature '.
  199. 'in the database.'),
  200. '#default_value' => $query_re,
  201. );
  202. $form['kegg']['query_type'] = array(
  203. '#title' => t('Query Type'),
  204. '#type' => 'textfield',
  205. '#description' => t('Please enter the Sequence Ontology term that describes '.
  206. 'the query sequences used for KEGG. This is only necessary if two '.
  207. 'or more sequences have the same name.'),
  208. '#default_value' => $query_type,
  209. );
  210. $form['kegg']['query_uniquename'] = array(
  211. '#title' => t('Use Unique Name'),
  212. '#type' => 'checkbox',
  213. '#description' => t('Select this checboxk if the feature name in the KEGG heir file '.
  214. 'matches the uniquename in the database. By default, the feature will '.
  215. 'be mapped to the "name" of the feature.'),
  216. '#default_value' => $query_uniquename,
  217. );
  218. $form['kegg']['keggjob'] = array(
  219. '#type' => 'checkbox',
  220. '#title' => t('Submit a job to parse the kegg output into analysisfeatureprop table'),
  221. '#description' => t('Note: features associated with the KAAS results must '.
  222. 'exist in chado before parsing the file. Otherwise, KEGG '.
  223. 'results that cannot be linked to a feature will be '.
  224. 'discarded.'),
  225. );
  226. return $form;
  227. }
  228. /*******************************************************************************
  229. *
  230. */
  231. function chado_analysis_kegg_insert($node){
  232. // insert the analysis
  233. chado_analysis_insert($node);
  234. // set the type for this analysis
  235. tripal_analysis_insert_property($node->analysis_id,'analysis_type','tripal_analysis_kegg');
  236. // now add in the remaining settings as a single property but separated by bars
  237. tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_settings',$node->hierfile);
  238. tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_query_re',$node->query_re);
  239. tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_query_type',$node->query_type);
  240. tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_query_uniquename',$node->query_uniquename);
  241. // Add a job if the user wants to parse the html output
  242. chado_analysis_kegg_submit_job($node);
  243. }
  244. /**
  245. *
  246. */
  247. function chado_analysis_kegg_submit_job($node){
  248. global $user;
  249. if($node->keggjob) {
  250. $job_args[0] = $node->analysis_id;
  251. $job_args[1] = $node->hierfile;
  252. $job_args[2] = base_path();
  253. $job_args[3] = $node->query_re;
  254. $job_args[4] = $node->query_type;
  255. $job_args[5] = $node->query_uniquename;
  256. if (is_readable($node->hierfile)) {
  257. $fname = preg_replace("/.*\/(.*)/", "$1", $node->hierfile);
  258. tripal_add_job("Parse KAAS output: $fname",'tripal_analysis_kegg',
  259. 'tripal_analysis_kegg_parseHierFile', $job_args, $user->uid);
  260. } else {
  261. drupal_set_message("Can not open KAAS hier.tar.gz output file. Job not scheduled.");
  262. }
  263. }
  264. }
  265. /*******************************************************************************
  266. * Delete KEGG anlysis
  267. */
  268. function chado_analysis_kegg_delete($node){
  269. chado_analysis_delete($node);
  270. }
  271. /*******************************************************************************
  272. * Update KEGG analysis
  273. */
  274. function chado_analysis_kegg_update($node){
  275. // insert the analysis
  276. chado_analysis_update($node);
  277. // set the type for this analysis
  278. tripal_analysis_update_property($node->analysis_id,'analysis_type','tripal_analysis_kegg',1);
  279. // now add in the remaining settings as a single property but separated by bars
  280. tripal_analysis_update_property($node->analysis_id,'analysis_kegg_settings',$node->hierfile,1);
  281. tripal_analysis_update_property($node->analysis_id,'analysis_kegg_query_re',$node->query_re,1);
  282. tripal_analysis_update_property($node->analysis_id,'analysis_kegg_query_type',$node->query_type,1);
  283. tripal_analysis_update_property($node->analysis_id,'analysis_kegg_query_uniquename',$node->query_uniquename,1);
  284. // Add a job if the user wants to parse the html output
  285. chado_analysis_kegg_submit_job($node);
  286. }
  287. /*******************************************************************************
  288. * When a node is requested by the user this function is called to allow us
  289. * to add auxiliary data to the node object.
  290. */
  291. function chado_analysis_kegg_load($node){
  292. // load the default set of analysis fields
  293. $additions = chado_analysis_load($node);
  294. // create some variables for easier lookup
  295. $analysis = $additions->analysis;
  296. $analysis_id = $analysis->analysis_id;
  297. // get the heirfile name
  298. $hierfile = tripal_analysis_get_property($analysis_id,'analysis_kegg_settings');
  299. $query_re = tripal_analysis_get_property($analysis->analysis_id,'analysis_kegg_query_re');
  300. $query_type = tripal_analysis_get_property($analysis->analysis_id,'analysis_kegg_query_type');
  301. $query_uniquename= tripal_analysis_get_property($analysis->analysis_id,'analysis_kegg_query_uniquename');
  302. $analysis->tripal_analysis_kegg->hierfile = $hierfile->value;
  303. $analysis->tripal_analysis_kegg->query_re = $query_re->value;
  304. $analysis->tripal_analysis_kegg->query_type = $query_type->value;
  305. $analysis->tripal_analysis_kegg->query_uniquename= $query_uniquename->value;
  306. return $additions;
  307. }
  308. /**
  309. *
  310. */
  311. function chado_analysis_kegg_view ($node, $teaser = FALSE, $page = FALSE) {
  312. // use drupal's default node view:
  313. if (!$teaser) {
  314. $node = node_prepare($node, $teaser);
  315. // When previewing a node submitting form, it shows 'Array' instead of
  316. // correct date format. We need to format the date here
  317. $time = $node->timeexecuted;
  318. if(is_array($time)){
  319. $month = $time['month'];
  320. $day = $time['day'];
  321. $year = $time['year'];
  322. $timestamp = $year.'-'.$month.'-'.$day;
  323. $node->timeexecuted = $timestamp;
  324. }
  325. }
  326. return $node;
  327. }
  328. /********************************************************************************
  329. */
  330. function tripal_analysis_kegg_parseHierFile ($analysis_id, $hierfile, $base_path,
  331. $query_re,$query_type,$query_uniquename,$job_id) {
  332. // If user input a file (e.g. hier.tar.gz), decompress it first
  333. if (is_file($hierfile)) {
  334. $data_dir = file_directory_path() . "/tripal/tripal_analysis_kegg";
  335. $stderr = shell_exec("cd $data_dir; tar -zxf $hierfile;");
  336. print "$stderr\n";
  337. $hierdir = $data_dir."/hier";
  338. // Otherwise, treat it as a directory
  339. } else {
  340. $hierdir = $hierfile;
  341. }
  342. $dir_handle = @opendir($hierdir) or die("Unable to open $hierdir");
  343. $total_files = count(glob($hierdir . '/*.*'));
  344. print "There are $total_files keg file(s).\n";
  345. $interval = intval($total_files * 0.01);
  346. $no_file = 0;
  347. // Remove the analysis features for this analysis
  348. // we will rebuild them from just this parsing
  349. if(!tripal_core_chado_delete('analysisfeature',array('analysis_id' => $analysis_id))){
  350. print "ERROR: Cannot prepare the analysis for addint features\n";
  351. exit;
  352. }
  353. while ($file = readdir($dir_handle)) {
  354. if(preg_match("/^.*\.keg/",$file)){
  355. // Update the progress
  356. if ($no_file % $interval == 0) {
  357. $percentage = (int) ($no_file / $total_files * 100);
  358. tripal_job_set_progress($job_id, $percentage);
  359. print $percentage."% ";
  360. }
  361. $no_file ++;
  362. # $type variable will be set in tripal_analysis_kegg_parse_kegg_file()
  363. $content = tripal_analysis_kegg_parse_kegg_file("$hierdir/$file",$type,
  364. $analysis_id, $base_path, $query_re,$query_type,$query_uniquename);
  365. # add the item to the database
  366. if($content){
  367. //------------------------------------------------------
  368. // Insert into analysisprop table
  369. //------------------------------------------------------
  370. // Make sure the same value doesn't exist before inserting into chado
  371. $sql = "SELECT value
  372. FROM {analysisprop}
  373. WHERE analysis_id = %d
  374. AND type_id = (SELECT cvterm_id
  375. FROM {cvterm} CVT
  376. INNER JOIN CV ON CVT.cv_id = CV.cv_id
  377. WHERE CV.name = 'tripal' AND CVT.name = '%s'
  378. )
  379. ";
  380. $previous_db = tripal_db_set_active('chado');
  381. $oldvalue = db_result(db_query($sql, $analysis_id, $type));
  382. tripal_db_set_active($previous_db);
  383. // Insert the content
  384. if ($oldvalue != $content) {
  385. $previous_db = tripal_db_set_active('chado'); // Use chado database
  386. // Get type_id for the BRITE term
  387. $sql = "SELECT cvterm_id
  388. FROM {cvterm} CVT
  389. INNER JOIN CV ON CVT.cv_id = CV.cv_id
  390. WHERE CV.name = 'tripal' AND CVT.name = '%s'";
  391. $brite_cvterm_id = db_result(db_query($sql, $type));
  392. // Replace all single quote as HTML code before insert
  393. $content = preg_replace("/\'/", "&#39;", $content);
  394. // Insert the value
  395. $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value)
  396. VALUES (%d, %d,'$content')";
  397. db_query($sql, $analysis_id, $brite_cvterm_id);
  398. tripal_db_set_active($previous_db); // Use drupal database
  399. }
  400. }
  401. }
  402. }
  403. print "Done.\n";
  404. closedir($dir_handle);
  405. // If user input a file, remove decompressed files after parsing
  406. if (is_file($hierfile)) {
  407. $stderr = shell_exec("rm -r $hierdir;");
  408. print "$stderr\n";
  409. }
  410. return;
  411. }
  412. /*******************************************************************************
  413. * Parse *.keg files.
  414. * Each file has a definition line. BRITE term is extracted * from this line
  415. * and added to chado as a cvterm. Tree structure for this cvterm is then
  416. * generated and saved to analysisfeature and analysisfeatureprop tables.
  417. */
  418. function tripal_analysis_kegg_parse_kegg_file ($file,&$type,$analysis_id, $base_path, $query_re,$query_type,$query_uniquename){
  419. $handle = fopen($file,'r');
  420. $depth = array();
  421. $current = '@'; # this is one character below 'A' in the ASCII table
  422. $prev = '@';
  423. $id = 0;
  424. $type_id = 0;
  425. $no_line = 0;
  426. $ul_content = array();
  427. $has_feature = 0;
  428. $level = -1;
  429. while($line = fgets($handle)){
  430. $no_line ++;
  431. // Find out what kind of file we're looking at.
  432. if(preg_match("/#.*nbsp;\s(.*)<\/h2>$/",$line,$matches)){
  433. // Set type as the matched term in the DEFINITION line and add it as a new cvterm
  434. $type = $matches[1];
  435. // Before inserting, make sure this cvterm doesn't exist
  436. $previous_db = tripal_db_set_active('chado');
  437. $sql = "SELECT cvterm_id
  438. FROM {cvterm} CVT
  439. INNER JOIN cv ON cv.cv_id = CVT.cv_id
  440. WHERE cv.name = 'tripal'
  441. AND CVT.name = '%s'";
  442. $type_id = db_result(db_query($sql, $type));
  443. tripal_db_set_active($previous_db);
  444. if (!$type_id) {
  445. tripal_add_cvterms($type, "KEGG BRITE term: $type");
  446. // Get newly added type_id
  447. $sql = "SELECT cvterm_id
  448. FROM {cvterm} CVT
  449. INNER JOIN cv ON cv.cv_id = CVT.cv_id
  450. WHERE cv.name = 'tripal'
  451. AND CVT.name = '%s'";
  452. $previous_db = tripal_db_set_active('chado');
  453. $type_id = db_result(db_query($sql, $type));
  454. tripal_db_set_active($previous_db);
  455. }
  456. }
  457. // get the depth of the hierarch (e.g. A,B,C or D);
  458. preg_match("/^([ABCDEFGHIJKLMNOP])\s*(.*)/",$line,$matches);
  459. // skip lines that aren't data or are empty
  460. if(!$matches[1] or !$matches[2]){continue;}
  461. // set the current level and keep track of the previous level (e.g. A,B,C...)
  462. $prev = $current;
  463. $current = $matches[1];
  464. // if we have matches and we are going down to a lower level then we
  465. // want to close off each <ul> until we hit the level we are currently at
  466. // and at the same time we want to keep the text. If we don't have a
  467. // match then we'll throw out the text.
  468. if($has_feature){
  469. for($i = (ord($current) - ord($prev)); $i < 0; $i++){
  470. $ul_content[$level] .= "</ul>\n";
  471. $content .= $ul_content[$level];
  472. $ul_content[$level--] = '';
  473. }
  474. $has_feature = 0;
  475. } else {
  476. for($i = (ord($current) - ord($prev)); $i < 0; $i++){
  477. $ul_content[$level--] = '';
  478. }
  479. }
  480. // if we've gone up a level then add a new <ul> block
  481. for($i = 0; $i < (ord($current) - ord($prev)); $i++){
  482. $ul_content[++$level] .= "<ul>\n";
  483. }
  484. // change all relative paths to absolute paths pointing to KEGG (www.genome.jp)
  485. $matches[2] = preg_replace("/<a href=\"\//i","<a href=\"http://www.genome.jp/",$matches[2]);
  486. // add id to <a> tags so we can link kegg.gif to it in tripal_analysis_kegg.css
  487. $matches[2] = preg_replace("/<a href=\"/i","<a id=\"tripal_kegg_brite_links\" target=\"_blank\" href=\"",$matches[2]);
  488. // extract the features that have been mapped to the KEGG IDs
  489. if(preg_match("/^(.*?);\s*(\<a.+)/",$matches[2],$keggline)){
  490. $has_feature = 1;
  491. // Find cvterm_id for 'kegg_brite_data'
  492. $sql = "SELECT cvterm_id
  493. FROM {cvterm} CVT
  494. INNER JOIN cv ON cv.cv_id = CVT.cv_id
  495. WHERE cv.name = 'tripal'
  496. AND CVT.name = '%s'";
  497. $previous_db = tripal_db_set_active('chado');
  498. $brite_data_type_id = db_result(db_query($sql, 'kegg_brite_data'));
  499. tripal_db_set_active($previous_db);
  500. // get the feature name using the user's regular expression
  501. if ($query_re and preg_match("/$query_re/", $keggline[1], $parts)) {
  502. $feature = $parts[1];
  503. }
  504. // If not in above format then pull up to the first space
  505. else {
  506. if (preg_match('/^(.*?)\s.*$/', $keggline[1], $parts)) {
  507. $feature = $parts[1];
  508. }
  509. // if no match up to the first space then just use the entire string
  510. else {
  511. $feature = $keggline[1];
  512. }
  513. }
  514. // now find the feature in chado
  515. $select = array();
  516. if($query_uniquename){
  517. $select['uniquename'] = $feature;
  518. } else {
  519. $select['name'] = $feature;
  520. }
  521. if($query_type){
  522. $select['type_id'] = array(
  523. 'cv_id' => array(
  524. 'name' => 'sequence'
  525. ),
  526. 'name' => $query_type,
  527. );
  528. }
  529. $feature_arr = tripal_core_chado_select('feature',array('feature_id'),$select);
  530. if(count($feature_arr) > 1){
  531. print "Ambiguous: '$feature' matches more than one feature and is being skipped.\n";
  532. continue;
  533. }
  534. if(count($feature_arr) == 0){
  535. print "Failed: '$feature' cannot find a matching feature in the database.\n";
  536. continue;
  537. }
  538. print "Adding KEGG results for $feature ($feature_id,$analysis_id): $type\n";
  539. $feature_id = $feature_arr[0]->feature_id;
  540. if($feature_id){
  541. // get the node ID of the feature if one exists
  542. $sql = "SELECT nid FROM {chado_feature} WHERE feature_id = %d";
  543. $nid = db_result(db_query($sql, $feature_id));
  544. //------------------------------------------------------
  545. // Insert into analysisfeature table
  546. //------------------------------------------------------
  547. // Get the higest rank for this feature_id in analysisfeatureprop table
  548. $sql = "SELECT MAX(rank) FROM {analysisfeatureprop} AFP ".
  549. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  550. "WHERE feature_id=%d ".
  551. "AND analysis_id=%d ".
  552. "AND type_id=%d ";
  553. $previous_db = tripal_db_set_active('chado');
  554. $afp = db_fetch_object(db_query($sql, $feature_id, $analysis_id, $brite_data_type_id));
  555. tripal_db_set_active($prevous_db);
  556. $hi_rank = 0;
  557. if ($afp) {
  558. $hi_rank = $afp->max + 1;
  559. }
  560. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  561. "VALUES (%d, %d)";
  562. $previous_db = tripal_db_set_active('chado');
  563. db_query ($sql, $feature_id, $analysis_id);
  564. tripal_db_set_active($previous_db);
  565. // Get the newly inserted analysisfeature_id
  566. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
  567. $previous_db = tripal_db_set_active('chado');
  568. $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
  569. tripal_db_set_active($previous_db);
  570. // Insert into analysisfeatureprop table
  571. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  572. "VALUES (%d, %d, '%s', %d)";
  573. $previous_db = tripal_db_set_active('chado');
  574. db_query($sql, $analysisfeature_id, $brite_data_type_id, $keggline[2], $hi_rank);
  575. tripal_db_set_active($previous_db);
  576. // Add link to each matched feature
  577. if($nid){
  578. $matches[2] = preg_replace("/^(.*?)(;\s*\<a)/","<a id=\"tripal_kegg_feature_links\" target=\"_blank\" href=\"".url("/node/$nid")."\">"."$1"."</a>"."$2",$matches[2]);
  579. }
  580. }
  581. // we only want to store results with features
  582. $ul_content[$level] .= "<li id=\"term_$id\">$matches[2]</li>\n";
  583. }
  584. $id++;
  585. }
  586. $content .= "</ul>";
  587. fclose($handle);
  588. return $content;
  589. }
  590. /*******************************************************************************
  591. * HOOK: Implementation of hook_nodeapi()
  592. * Display library information for associated features or organisms
  593. * This function also provides contents for indexing
  594. */
  595. function tripal_analysis_kegg_nodeapi(&$node, $op, $teaser, $page) {
  596. switch ($op) {
  597. case 'view':
  598. // add the library to the organism/feature search indexing
  599. if($node->build_mode == NODE_BUILD_SEARCH_INDEX){
  600. $node->content['tripal_analysis_kegg_search_index'] = array(
  601. '#value' => theme('tripal_analysis_kegg_search_index',$node),
  602. '#weight' => 6,
  603. );
  604. } else if ($node->build_mode == NODE_BUILD_SEARCH_RESULT) {
  605. $node->content['tripal_analysis_kegg_search_result'] = array(
  606. '#value' => theme('tripal_analysis_kegg_search_result',$node),
  607. '#weight' => 6,
  608. );
  609. } else {
  610. switch($node->type){
  611. case 'chado_organism':
  612. // Show KEGG BRITE on an analysis page OR KEGG info box on a feature page
  613. $node->content['tripal_organism_kegg_summary'] = array(
  614. '#value' => theme('tripal_organism_kegg_summary', $node),
  615. '#weight' => 6
  616. );
  617. break;
  618. }
  619. }
  620. break;
  621. }
  622. }
  623. /************************************************************************
  624. * We need to let drupal know about our theme functions and their arguments.
  625. * We create theme functions to allow users of the module to customize the
  626. * look and feel of the output generated in this module
  627. */
  628. function tripal_analysis_kegg_theme () {
  629. return array(
  630. 'tripal_analysis_kegg_search_index' => array (
  631. 'arguments' => array('node'),
  632. ),
  633. 'tripal_analysis_kegg_search_result' => array (
  634. 'arguments' => array('node'),
  635. ),
  636. 'tripal_organism_kegg_summary' => array (
  637. 'arguments' => array('node'=> null),
  638. 'template' => 'tripal_organism_kegg_summary',
  639. ),
  640. 'tripal_analysis_kegg_report' => array (
  641. 'arguments' => array('node'=> null),
  642. 'template' => 'tripal_analysis_kegg_report',
  643. ),
  644. );
  645. }
  646. /**
  647. *
  648. *
  649. * @ingroup tripal_analysis_kegg
  650. */
  651. function tripal_analysis_kegg_preprocess(&$variables){
  652. // if the template file is the default node template file then we want
  653. // to add all of our variables.
  654. if($variables['template_files'][0] == 'node-chado_analysis_kegg'){
  655. $analysis = $variables['node']->analysis;
  656. $report = tripal_analysis_kegg_full_report($analysis->analysis_id);
  657. $analysis->tripal_analysis_kegg->kegg_report = $report;
  658. }
  659. }
  660. /*******************************************************************************
  661. *
  662. */
  663. function tripal_analysis_kegg_preprocess_tripal_organism_kegg_summary(&$variables){
  664. $node = $variables['node'];
  665. $variables['tripal_analysis_kegg']['select_form'] = tripal_analysis_kegg_load_organism_kegg_summary($node);
  666. }
  667. /************************************************************************
  668. */
  669. function theme_tripal_analysis_kegg_search_index($node){
  670. if ($node->type == 'chado_feature') {
  671. // Find cvterm_id for 'kegg_brite_data'
  672. $sql = "SELECT cvterm_id
  673. FROM {cvterm} CVT
  674. INNER JOIN cv ON cv.cv_id = CVT.cv_id
  675. WHERE cv.name = 'tripal'
  676. AND CVT.name = '%s'";
  677. $previous_db = tripal_db_set_active('chado');
  678. $brite_data_type_id = db_result(db_query($sql, 'kegg_brite_data'));
  679. // Get analysis id
  680. $sql = "SELECT analysis_id AS aid
  681. FROM {analysisfeature} AF
  682. INNER JOIN analysisfeatureprop AFP ON AF.analysisfeature_id = AFP.analysisfeature_id
  683. WHERE feature_id = %d
  684. AND AFP.type_id = %d
  685. GROUP BY analysis_id";
  686. $feature = $node->feature;
  687. $feature_id = $feature->feature_id;
  688. $hasResult = db_result(db_query($sql, $feature_id, $brite_data_type_id));
  689. $result = db_query($sql, $feature->feature_id, $brite_data_type_id);
  690. // Show kegg result ORDER BY time
  691. if ($hasResult) { // If there is any result, show expandable box
  692. $content = "";
  693. while ($ana = db_fetch_object($result)) {
  694. // Show analysis date
  695. $sql = "SELECT name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  696. FROM {analysis}
  697. WHERE analysis_id = %d";
  698. $ana_details = db_fetch_object(db_query($sql, $ana->aid));
  699. // Find node id for the analysis
  700. tripal_db_set_active($previous_db);
  701. $ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $ana->aid));
  702. $ana_url = url("node/".$ana_nid);
  703. $previous_db = tripal_db_set_active('chado');
  704. // Show content
  705. $content .= "$ana_details->name";
  706. // Show Kegg results
  707. $sql = "SELECT AFP.value AS afpvalue
  708. FROM {analysisfeatureprop} AFP
  709. INNER JOIN analysisfeature AF on AF.analysisfeature_id = AFP.analysisfeature_id
  710. WHERE AF.analysis_id = %d
  711. AND AF.feature_id = %d
  712. ";
  713. $kegg_results = db_query($sql, $ana->aid, $feature_id);
  714. while ($afp = db_fetch_object($kegg_results)) {
  715. $content .= " $afp->afpvalue";
  716. }
  717. }
  718. }
  719. tripal_db_set_active($previous_db);
  720. return $content;
  721. }
  722. }
  723. /************************************************************************
  724. */
  725. function theme_tripal_analysis_kegg_search_result($node){
  726. $content = theme_tripal_analysis_kegg_node_add($node);
  727. return $content;
  728. }
  729. /************************************************************************
  730. */
  731. function tripal_analysis_kegg_load_organism_kegg_summary($node) {
  732. $organism = $node->organism;
  733. // find analyses that have KEGG terms
  734. $sql = "
  735. SELECT *
  736. FROM {kegg_by_organism} KBO
  737. WHERE organism_id = %d
  738. ORDER BY analysis_id DESC
  739. ";
  740. $previous_db = tripal_db_set_active('chado');
  741. $results = db_fetch_object(db_query($sql,$organism->organism_id));
  742. tripal_db_set_active($previous_db);
  743. $has_results = 0;
  744. if($results){
  745. $has_results = 1;
  746. }
  747. return array (
  748. 'has_results' => $has_results,
  749. 'form' => drupal_get_form('tripal_analysis_kegg_select_form',$node),
  750. );
  751. }
  752. /************************************************************************
  753. // Show Kegg additional information on a KEGG Analysis page
  754. if ($node->type == 'chado_analysis_kegg') {
  755. return tripal_analysis_kegg_full_report($node->analysis_id);
  756. }
  757. // Show Kegg-info-box on a Feature page
  758. else if ($node->type == 'chado_feature') {
  759. return tripal_analysis_kegg_feature_add($node);
  760. }
  761. return $content;
  762. }
  763. */
  764. function tripal_analysis_kegg_org_report($analysis_id){
  765. $content = tripal_analysis_kegg_full_report($analysis_id);
  766. $opt = array($content);
  767. return drupal_json($opt);
  768. }
  769. /************************************************************************
  770. */
  771. function tripal_analysis_kegg_full_report($analysis_id){
  772. // Test if brite data have been parsed into the database
  773. $sql = "SELECT CVT.name, CVT.cvterm_id
  774. FROM {cvterm} CVT
  775. INNER JOIN analysisprop AP ON CVT.cvterm_id = AP.type_id
  776. WHERE AP.analysis_id = %d
  777. AND CVT.definition LIKE 'KEGG BRITE term: %'
  778. ORDER BY CVT.cvterm_id";
  779. $previous_db = tripal_db_set_active('chado');
  780. $result = db_query($sql, $analysis_id);
  781. tripal_db_set_active($previous_db);
  782. if (db_result($result)) {
  783. $content = tripal_analysis_kegg_brite($analysis_id, $type_id, 0);
  784. } else {
  785. $content = "<i>Note:</i> Analysis result is not available. Please schedule and run the job to parse the kegg output.";
  786. }
  787. return $content;
  788. }
  789. /*******************************************************************************
  790. * Tripal Kegg administrative setting form. This function is called by
  791. * tripal_analysis module which asks for an admin form to show on the page
  792. */
  793. function tripal_analysis_kegg_get_settings() {
  794. // Get an array of node types with internal names as keys
  795. $options = node_get_types('names');
  796. // Add 'chado_feature' to allowed content types for showing kegg results
  797. $allowedoptions ['chado_feature'] = "Show KEGG results on feature pages";
  798. $allowedoptions ['chado_analysis_kegg'] = "Show KEGG BRITE results on the analysis page.";
  799. $allowedoptions ['chado_organism'] = "Show KEGG BRITE results on the organism pages.";
  800. $form['description'] = array(
  801. '#type' => 'item',
  802. '#value' => t("Some chado features were analyzed by KEGG automatic annotation server (KAAS). This option allows user to display the kegg analysis results. Please read user manual for storage and display of kegg files. Check the box to enable the analysis results. Uncheck to disable it."),
  803. '#weight' => 0,
  804. );
  805. $form['tripal_analysis_kegg_setting'] = array(
  806. '#type' => 'checkboxes',
  807. '#options' => $allowedoptions,
  808. '#default_value' => variable_get('tripal_analysis_kegg_setting',
  809. array('chado_feature', 'chado_analysis_kegg')),
  810. );
  811. $settings->form = $form;
  812. $settings->title = "Tripal Kegg";
  813. return $settings;
  814. }
  815. /************************************************************************
  816. */
  817. function tripal_analysis_kegg_organism_results($node) {
  818. $node = node_load($node);
  819. return tripal_analysis_kegg_organism_add($node);
  820. }
  821. /************************************************************************
  822. */
  823. function tripal_analysis_kegg_feature_add($node) {
  824. // Find cvterm_id for 'kegg_brite_data'
  825. $sql = "SELECT cvterm_id
  826. FROM {cvterm} CVT
  827. INNER JOIN cv ON cv.cv_id = CVT.cv_id
  828. WHERE cv.name = 'tripal'
  829. AND CVT.name = '%s'";
  830. $previous_db = tripal_db_set_active('chado');
  831. $brite_data_type_id = db_result(db_query($sql, 'kegg_brite_data'));
  832. // Get analysis id
  833. $sql = "SELECT analysis_id AS aid
  834. FROM {analysisfeature} AF
  835. INNER JOIN analysisfeatureprop AFP ON AF.analysisfeature_id = AFP.analysisfeature_id
  836. WHERE feature_id = %d
  837. AND AFP.type_id = %d
  838. GROUP BY analysis_id";
  839. $feature = $node->feature;
  840. $feature_id = $feature->feature_id;
  841. $hasResult = db_result(db_query($sql, $feature_id, $brite_data_type_id));
  842. $result = db_query($sql, $feature->feature_id, $brite_data_type_id);
  843. // Show kegg result ORDER BY time
  844. if ($hasResult) { // If there is any result, show expandable box
  845. $content .= "<div id=\"tripal_kegg-hits\" class=\"tripal_kegg-info-box\">
  846. <table class=\"tripal_kegg_results_table\">
  847. <tr><td>";
  848. while ($ana = db_fetch_object($result)) {
  849. // Show analysis date
  850. $sql = "SELECT name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  851. FROM {analysis}
  852. WHERE analysis_id = %d";
  853. $ana_details = db_fetch_object(db_query($sql, $ana->aid));
  854. // Find node id for the analysis
  855. tripal_db_set_active($previous_db);
  856. $ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $ana->aid));
  857. $ana_url = url("node/".$ana_nid);
  858. $previous_db = tripal_db_set_active('chado');
  859. // Show content
  860. $content .= "<strong>Analysis Date:</strong> $ana_details->time
  861. (<a href=$ana_url>$ana_details->name</a>)<br>";
  862. // Show Kegg results
  863. $sql = "SELECT AFP.value AS afpvalue
  864. FROM {analysisfeatureprop} AFP
  865. INNER JOIN analysisfeature AF on AF.analysisfeature_id = AFP.analysisfeature_id
  866. WHERE AF.analysis_id = %d
  867. AND AF.feature_id = %d
  868. ";
  869. $kegg_results = db_query($sql, $ana->aid, $feature_id);
  870. while ($afp = db_fetch_object($kegg_results)) {
  871. $content .= "$afp->afpvalue<br>";
  872. }
  873. $content .= "<br>";
  874. }
  875. $content .= '</td></tr></table></div>';
  876. }
  877. tripal_db_set_active($previous_db);
  878. return $content;
  879. }
  880. /************************************************************************
  881. */
  882. function tripal_analysis_kegg_select_form(&$form_state = NULL,$node){
  883. $form = array();
  884. // find analyses that have KEGG terms
  885. $sql = "
  886. SELECT *
  887. FROM {kegg_by_organism} KBO
  888. WHERE organism_id = %d
  889. ORDER BY analysis_id DESC
  890. ";
  891. $previous_db = tripal_db_set_active('chado');
  892. $results = db_query($sql,$node->organism->organism_id);
  893. tripal_db_set_active($previous_db);
  894. $analyses = array();
  895. $analyses[''] = '';
  896. while($analysis = db_fetch_object($results)){
  897. $analyses[$analysis->analysis_id] = "$analysis->analysis_name";
  898. }
  899. # create the select box
  900. $form['tripal_analysis_kegg_select'] = array(
  901. '#title' => t('Select a KEGG report to view'),
  902. '#description' => t('Any analysis with KEGG results related to this organism are available for viewing. For further information, see the analysis information page.'),
  903. '#type' => 'select',
  904. '#options' => $analyses,
  905. '#attributes' => array (
  906. 'onchange' => 'tripal_analysis_kegg_org_report(this.options[this.selectedIndex].value)'
  907. ),
  908. );
  909. return $form;
  910. }