tripal_analysis_kegg.module 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254
  1. <?php
  2. /*******************************************************************************
  3. *
  4. ******************************************************************************/
  5. function tripal_analysis_kegg_init(){
  6. // add the tripal_analysis_kegg JS and CSS
  7. drupal_add_js(drupal_get_path('theme', 'tripal').'/js/tripal_analysis_kegg.js');
  8. drupal_add_css(drupal_get_path('theme', 'tripal').'/css/tripal_analysis_kegg.css');
  9. // add the jsTree JS and CSS
  10. drupal_add_css(drupal_get_path('theme', 'tripal').'/js/jsTree/source/tree_component.css');
  11. drupal_add_js (drupal_get_path('theme', 'tripal').'/js/jsTree/source/_lib.js');
  12. drupal_add_js (drupal_get_path('theme', 'tripal').'/js/jsTree/source/tree_component.js');
  13. }
  14. /*******************************************************************************
  15. * Provide information to drupal about the node types that we're creating
  16. * in this module
  17. */
  18. function tripal_analysis_kegg_node_info() {
  19. $nodes = array();
  20. $nodes['chado_analysis_kegg'] = array(
  21. 'name' => t('Analysis: KEGG'),
  22. 'module' => 'chado_analysis_kegg',
  23. 'description' => t('Results from a KEGG/KAAS analysis'),
  24. 'has_title' => FALSE,
  25. 'title_label' => t('Analysis: KEGG'),
  26. 'has_body' => FALSE,
  27. 'body_label' => t('KEGG Analysis Description'),
  28. 'locked' => TRUE
  29. );
  30. return $nodes;
  31. }
  32. /*******************************************************************************
  33. * Menu items are automatically added for the new node types created
  34. * by this module to the 'Create Content' Navigation menu item. This function
  35. * adds more menu items needed for this module.
  36. */
  37. function tripal_analysis_kegg_menu() {
  38. $items['brite/%'] = array(
  39. 'title' => t('KEGG BRITE'),
  40. 'page callback' => 'tripal_analysis_kegg_brite',
  41. 'page arguments' => array(1, 2),
  42. 'access arguments' => array('access content'),
  43. 'type' => MENU_CALLBACK
  44. );
  45. $items['tripal_analysis_kegg_org_report/%'] = array(
  46. 'path' => 'tripal_analysis_kegg_org_report',
  47. 'title' => t('Analysis KEGG report'),
  48. 'page callback' => 'tripal_analysis_kegg_org_report',
  49. 'page arguments' => array(1),
  50. 'access arguments' => array('access chado_analysis_kegg content'),
  51. 'type' => MENU_CALLBACK
  52. );
  53. return $items;
  54. }
  55. /*******************************************************************************
  56. * Set the permission types that the chado module uses. Essentially we
  57. * want permissionis that protect creation, editing and deleting of chado
  58. * data objects
  59. */
  60. function tripal_analysis_kegg_perm(){
  61. return array(
  62. 'access chado_analysis_kegg content',
  63. 'create chado_analysis_kegg content',
  64. 'delete chado_analysis_kegg content',
  65. 'edit chado_analysis_kegg content',
  66. );
  67. }
  68. /*******************************************************************************
  69. * The following function proves access control for users trying to
  70. * perform actions on data managed by this module
  71. */
  72. function chado_analysis_kegg_access($op, $node, $account){
  73. if ($op == 'create') {
  74. return user_access('create chado_analysis_kegg content', $account);
  75. }
  76. if ($op == 'update') {
  77. if (user_access('edit chado_analysis_kegg content', $account)) {
  78. return TRUE;
  79. }
  80. }
  81. if ($op == 'delete') {
  82. if (user_access('delete chado_analysis_kegg content', $account)) {
  83. return TRUE;
  84. }
  85. }
  86. if ($op == 'view') {
  87. if (user_access('access chado_analysis_kegg content', $account)) {
  88. return TRUE;
  89. }
  90. }
  91. return FALSE;
  92. }
  93. /*******************************************************************************
  94. */
  95. function tripal_analysis_kegg_brite($analysis_id, $type_id, $ajax){
  96. global $base_url;
  97. $theme_dir = drupal_get_path('theme','tripal');
  98. // If not called by ajax
  99. if (!$ajax) {
  100. $content .=
  101. "<div id=\"tripal_kegg_brite_results\" class=\"tripal_kegg_brite-info-box\">
  102. <table>
  103. <tr>
  104. <th>KEGG BRITE</th>
  105. <th id=\"tripal_kegg_brite_header\">Hierarchy:</th>
  106. </tr>
  107. <tr>
  108. <td nowrap valign=\"top\">
  109. ";
  110. // List all BRITE terms on the left
  111. $sql = "SELECT DISTINCT CVT.name, CVT.cvterm_id
  112. FROM {cvterm} CVT
  113. INNER JOIN analysisprop AP ON CVT.cvterm_id = AP.type_id
  114. WHERE AP.analysis_id = %d AND CVT.definition LIKE 'KEGG BRITE term: %'
  115. ORDER BY CVT.cvterm_id";
  116. $previous_db = tripal_db_set_active('chado');
  117. $result = db_query($sql, $analysis_id);
  118. tripal_db_set_active($previous_db);
  119. while ($brite_term = db_fetch_object($result)) {
  120. $url = url("brite/$analysis_id/$brite_term->cvterm_id/1");
  121. $content .= "<li class=\"tripal_kegg_brite_terms\"><a onclick=\"return tripal_update_brite(".
  122. "this,$brite_term->cvterm_id,'$base_url','$theme_dir')\" href=\"$url\">
  123. $brite_term->name
  124. </a></li>";
  125. }
  126. // Show the hierarchy tree
  127. $content .="</td>
  128. <td nowrap id=\"tripal_kegg_brite_hierarchy\" valign=\"top\">";
  129. $content .= "<i>Note:</i> Click a BRITE term for its functional hierarchy";
  130. // If called by ajax, generate tree structure
  131. } else {
  132. // Get BRITE term from cvterm table
  133. $previous_db = tripal_db_set_active('chado');
  134. $sql = 'SELECT name FROM {cvterm} WHERE cvterm_id=%d';
  135. $brite_term = db_result(db_query($sql, $type_id));
  136. // Get BRITE hierarchy tree
  137. $sql = "SELECT value
  138. FROM {analysisprop} AP
  139. INNER JOIN CVterm CVT on AP.type_id = CVT.cvterm_id
  140. INNER JOIN CV on CVT.cv_id = CV.cv_id
  141. WHERE CV.name = 'tripal' and CVT.name = '%s'
  142. AND AP.analysis_id = %d";
  143. $result = db_fetch_object(db_query($sql, $brite_term, $analysis_id));
  144. tripal_db_set_active($previous_db);
  145. $content .= "<div class=\"tripal_kegg_brite_tree\" id=\"tripal_kegg_brite_tree_$type_id\">$result->value</div>";
  146. }
  147. if (!$ajax) {
  148. $content .= " </td>
  149. </tr>
  150. </table>
  151. </div>";
  152. }
  153. // since this function provides output for addition into
  154. // an analysis page, as well as an AJAX refresh of content
  155. // within the BRITE hierarchy we need to setup the return
  156. // different depending on the request type
  157. if($ajax){
  158. drupal_json(array('update' => $content,
  159. 'id' => "tripal_kegg_brite_tree_$type_id",
  160. 'brite_term' => "Hierarchy: $brite_term"));
  161. } else {
  162. return $content;
  163. }
  164. }
  165. /*******************************************************************************
  166. * Provide a KEGG Analysis form
  167. */
  168. function chado_analysis_kegg_form ($node){
  169. // add in the default fields
  170. $form = chado_analysis_form($node);
  171. // set the defaults
  172. $kegg = $node->analysis->tripal_analysis_kegg;
  173. $query_re = $kegg->query_re;
  174. $query_type = $kegg->query_type;
  175. $query_uniquename = $kegg->query_uniquename;
  176. $hierfile = $kegg->hierfile;
  177. $moreSettings ['kegg'] = 'KEGG Analysis Settings';
  178. $form['kegg'] = array(
  179. '#title' => t('KEGG Settings'),
  180. '#type' => 'fieldset',
  181. '#description' => t('Specific Settings for KEGG Analysis.'),
  182. '#collapsible' => TRUE,
  183. '#attributes' => array('id' => 'kegg-extra-settings'),
  184. '#weight' => 11
  185. );
  186. $form['kegg']['hierfile'] = array(
  187. '#title' => t('KAAS hier.tar.gz Output File'),
  188. '#type' => 'textfield',
  189. '#description' => t('The full path to the hier.tar.gz file generated by KAAS.
  190. Alternatively, you can input the full path to the directory
  191. that contains decompressed kegg files.'),
  192. '#default_value' => $hierfile,
  193. );
  194. $form['kegg']['query_re'] = array(
  195. '#title' => t('Query Name RE'),
  196. '#type' => 'textfield',
  197. '#description' => t('Enter the regular expression that will extract the '.
  198. 'feature name from the results line in the KEGG heir results. This will be '.
  199. 'the same as the definition line in the query FASTA file used for the analysis. This option is '.
  200. 'is only required when the query does not identically match a feature '.
  201. 'in the database.'),
  202. '#default_value' => $query_re,
  203. );
  204. $form['kegg']['query_type'] = array(
  205. '#title' => t('Query Type'),
  206. '#type' => 'textfield',
  207. '#description' => t('Please enter the Sequence Ontology term that describes '.
  208. 'the query sequences used for KEGG. This is only necessary if two '.
  209. 'or more sequences have the same name.'),
  210. '#default_value' => $query_type,
  211. );
  212. $form['kegg']['query_uniquename'] = array(
  213. '#title' => t('Use Unique Name'),
  214. '#type' => 'checkbox',
  215. '#description' => t('Select this checboxk if the feature name in the KEGG heir file '.
  216. 'matches the uniquename in the database. By default, the feature will '.
  217. 'be mapped to the "name" of the feature.'),
  218. '#default_value' => $query_uniquename,
  219. );
  220. $form['kegg']['keggjob'] = array(
  221. '#type' => 'checkbox',
  222. '#title' => t('Submit a job to parse the kegg output into analysisfeatureprop table'),
  223. '#description' => t('Note: features associated with the KAAS results must '.
  224. 'exist in chado before parsing the file. Otherwise, KEGG '.
  225. 'results that cannot be linked to a feature will be '.
  226. 'discarded.'),
  227. );
  228. $form['kegg']['keggkeywordjob'] = array(
  229. '#type' => 'checkbox',
  230. '#title' => t('Submit a job to extract keywords from the KEGG html output'),
  231. '#description' => t('Note: KEGG results are only searchable after keywords are extracted. Do not run this twice if you have already done so.'),
  232. );
  233. return $form;
  234. }
  235. /**
  236. *
  237. */
  238. function chado_analysis_kegg_validate($node, &$form){
  239. // use the analysis parent to validate the node
  240. tripal_analysis_validate($node, $form);
  241. }
  242. /*******************************************************************************
  243. *
  244. */
  245. function chado_analysis_kegg_insert($node){
  246. // insert the analysis
  247. chado_analysis_insert($node);
  248. // set the type for this analysis
  249. tripal_analysis_insert_property($node->analysis_id,'analysis_type','tripal_analysis_kegg');
  250. // now add in the remaining settings as a single property but separated by bars
  251. tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_settings',$node->hierfile);
  252. tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_query_re',$node->query_re);
  253. tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_query_type',$node->query_type);
  254. tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_query_uniquename',$node->query_uniquename);
  255. // Add a job if the user wants to parse the html output
  256. chado_analysis_kegg_submit_job($node);
  257. }
  258. /**
  259. *
  260. */
  261. function chado_analysis_kegg_submit_job($node){
  262. global $user;
  263. if($node->keggjob) {
  264. $job_args[0] = $node->analysis_id;
  265. $job_args[1] = $node->hierfile;
  266. $job_args[2] = base_path();
  267. $job_args[3] = $node->query_re;
  268. $job_args[4] = $node->query_type;
  269. $job_args[5] = $node->query_uniquename;
  270. if (is_readable($node->hierfile)) {
  271. $fname = preg_replace("/.*\/(.*)/", "$1", $node->hierfile);
  272. tripal_add_job("Parse KAAS output: $fname",'tripal_analysis_kegg',
  273. 'tripal_analysis_kegg_parseHierFile', $job_args, $user->uid);
  274. } else {
  275. drupal_set_message("Can not open KAAS hier.tar.gz output file. Job not scheduled.");
  276. }
  277. }
  278. // Add a job if the user wants to the keywords from the HTML output
  279. if ($node->keggkeywordjob) {
  280. $analysis_id =chado_get_id_for_node('analysis', $node);
  281. $job_args[0] = $analysis_id;
  282. tripal_add_job("Extract keywords for search: $node->analysisname",'tripal_analysis_kegg',
  283. 'tripal_analysis_kegg_extract_keywords', $job_args, $user->uid);
  284. }
  285. }
  286. /*******************************************************************************
  287. * Delete KEGG anlysis
  288. */
  289. function chado_analysis_kegg_delete($node){
  290. chado_analysis_delete($node);
  291. }
  292. /*******************************************************************************
  293. * Update KEGG analysis
  294. */
  295. function chado_analysis_kegg_update($node){
  296. // insert the analysis
  297. chado_analysis_update($node);
  298. // set the type for this analysis
  299. tripal_analysis_update_property($node->analysis_id,'analysis_type','tripal_analysis_kegg',1);
  300. // now add in the remaining settings as a single property but separated by bars
  301. tripal_analysis_update_property($node->analysis_id,'analysis_kegg_settings',$node->hierfile,1);
  302. tripal_analysis_update_property($node->analysis_id,'analysis_kegg_query_re',$node->query_re,1);
  303. tripal_analysis_update_property($node->analysis_id,'analysis_kegg_query_type',$node->query_type,1);
  304. tripal_analysis_update_property($node->analysis_id,'analysis_kegg_query_uniquename',$node->query_uniquename,1);
  305. // Add a job if the user wants to parse the html output
  306. chado_analysis_kegg_submit_job($node);
  307. }
  308. /*******************************************************************************
  309. * When a node is requested by the user this function is called to allow us
  310. * to add auxiliary data to the node object.
  311. */
  312. function chado_analysis_kegg_load($node){
  313. // load the default set of analysis fields
  314. $additions = chado_analysis_load($node);
  315. // create some variables for easier lookup
  316. $analysis = $additions->analysis;
  317. $analysis_id = $analysis->analysis_id;
  318. // get the heirfile name
  319. $hierfile = tripal_analysis_get_property($analysis_id,'analysis_kegg_settings');
  320. $query_re = tripal_analysis_get_property($analysis->analysis_id,'analysis_kegg_query_re');
  321. $query_type = tripal_analysis_get_property($analysis->analysis_id,'analysis_kegg_query_type');
  322. $query_uniquename= tripal_analysis_get_property($analysis->analysis_id,'analysis_kegg_query_uniquename');
  323. $analysis->tripal_analysis_kegg->hierfile = $hierfile->value;
  324. $analysis->tripal_analysis_kegg->query_re = $query_re->value;
  325. $analysis->tripal_analysis_kegg->query_type = $query_type->value;
  326. $analysis->tripal_analysis_kegg->query_uniquename= $query_uniquename->value;
  327. return $additions;
  328. }
  329. /**
  330. *
  331. */
  332. function chado_analysis_kegg_view ($node, $teaser = FALSE, $page = FALSE) {
  333. // use drupal's default node view:
  334. if (!$teaser) {
  335. $node = node_prepare($node, $teaser);
  336. // When previewing a node submitting form, it shows 'Array' instead of
  337. // correct date format. We need to format the date here
  338. $time = $node->timeexecuted;
  339. if(is_array($time)){
  340. $month = $time['month'];
  341. $day = $time['day'];
  342. $year = $time['year'];
  343. $timestamp = $year.'-'.$month.'-'.$day;
  344. $node->timeexecuted = $timestamp;
  345. }
  346. }
  347. return $node;
  348. }
  349. /********************************************************************************
  350. */
  351. function tripal_analysis_kegg_parseHierFile ($analysis_id, $hierfile, $base_path,
  352. $query_re,$query_type,$query_uniquename,$job_id) {
  353. // If user input a file (e.g. hier.tar.gz), decompress it first
  354. if (is_file($hierfile)) {
  355. // generate a unique directory name for extracting and parsing the file
  356. $data_dir = sys_get_temp_dir() . "/" . uniqid();
  357. mkdir($data_dir);
  358. $stderr = shell_exec("cd $data_dir; tar -zxf $hierfile;");
  359. print "$stderr\n";
  360. $hierdir = $data_dir . "/hier";
  361. }
  362. // Otherwise, treat it as a directory
  363. else {
  364. $hierdir = $hierfile;
  365. }
  366. $dir_handle = @opendir($hierdir) or die("Unable to open $hierdir");
  367. $total_files = count(glob($hierdir . '/*.*'));
  368. print "There are $total_files keg file(s).\n";
  369. $interval = intval($total_files * 0.01);
  370. $no_file = 0;
  371. // Remove the analysis feature this analysis
  372. // we will rebuild them from just this parsing
  373. $select = array('analysis_id' => $analysis_id);
  374. if(!tripal_core_chado_delete('analysisfeature',$select)){
  375. print "ERROR: Cannot prepare the analysis for adding features\n";
  376. exit;
  377. }
  378. while ($file = readdir($dir_handle)) {
  379. if(preg_match("/^.*\.keg/",$file)){
  380. // Update the progress
  381. if ($no_file % $interval == 0) {
  382. $percentage = (int) ($no_file / $total_files * 100);
  383. tripal_job_set_progress($job_id, $percentage);
  384. print $percentage."% ";
  385. }
  386. $no_file ++;
  387. # $heirarchy variable will be set in tripal_analysis_kegg_parse_kegg_file()
  388. $results = tripal_analysis_kegg_parse_kegg_file("$hierdir/$file",$heirarchy,
  389. $analysis_id, $base_path, $query_re,$query_type,$query_uniquename);
  390. # add the item to the database
  391. if(count($results) > 0){
  392. //------------------------------------------------------
  393. // Insert into analysisprop table
  394. //------------------------------------------------------
  395. // Remove the property if it already exists we'll replace it
  396. $sql = "DELETE
  397. FROM {analysisprop}
  398. WHERE analysis_id = %d
  399. AND type_id = (SELECT cvterm_id
  400. FROM {cvterm} CVT
  401. INNER JOIN CV ON CVT.cv_id = CV.cv_id
  402. WHERE CV.name = 'tripal' AND CVT.name = '%s'
  403. )
  404. ";
  405. $previous_db = tripal_db_set_active('chado');
  406. db_result(db_query($sql, $analysis_id, $heirarchy));
  407. tripal_db_set_active($previous_db);
  408. $previous_db = tripal_db_set_active('chado'); // Use chado database
  409. // Get type_id for the BRITE term
  410. $sql = "SELECT cvterm_id
  411. FROM {cvterm} CVT
  412. INNER JOIN CV ON CVT.cv_id = CV.cv_id
  413. WHERE CV.name = 'tripal' AND CVT.name = '%s'";
  414. $brite_cvterm_id = db_result(db_query($sql, $heirarchy));
  415. // convert the array to text for saving in the database
  416. // Replace all single quote as HTML code before insert
  417. $i = 0;
  418. $content = '<ul>';
  419. tripal_analysis_kegg_array2list($results,$content,$i);
  420. $content .= '</ul>';
  421. $content = preg_replace("/\'/", "&#39;", $content);
  422. // Insert the value
  423. $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value)
  424. VALUES (%d, %d,'$content')";
  425. db_query($sql, $analysis_id, $brite_cvterm_id);
  426. tripal_db_set_active($previous_db); // Use drupal database
  427. }
  428. }
  429. }
  430. print "Done.\n";
  431. tripal_job_set_progress($job_id,100);
  432. closedir($dir_handle);
  433. // If user input a file, remove decompressed files after parsing
  434. if (is_file($hierfile)) {
  435. $stderr = shell_exec("rm -r $data_dir;");
  436. print "$stderr\n";
  437. }
  438. return;
  439. }
  440. /**
  441. *
  442. */
  443. function tripal_analysis_kegg_array2list($array,&$content,&$i){
  444. foreach($array as $index => $item){
  445. if(is_array($item)){
  446. if(is_numeric($index)){
  447. tripal_analysis_kegg_array2list($item,$content,$i);
  448. } else {
  449. $content .= "<li id=\"term_$i\"><a></a>$index\n<ul>";
  450. $i++;
  451. tripal_analysis_kegg_array2list($item,$content,$i);
  452. $content .= "</ul>\n</li>\n";
  453. }
  454. } else {
  455. $content .= "<li id=\"term_$i\"><a></a>$item</li>\n";
  456. $i++;
  457. }
  458. }
  459. }
  460. /*******************************************************************************
  461. * Parse *.keg files.
  462. * Each file has a definition line. BRITE term is extracted * from this line
  463. * and added to chado as a cvterm. Tree structure for this cvterm is then
  464. * generated and saved to analysisfeature and analysisfeatureprop tables.
  465. */
  466. function tripal_analysis_kegg_parse_kegg_file ($file, &$heirarchy, $analysis_id,
  467. $base_path, $query_re, $query_type, $query_uniquename)
  468. {
  469. print "Parsing $file\n";
  470. // get the 'kegg_brite_data' cvterm
  471. $select = array('name' => 'kegg_brite_data',
  472. 'cv_id' => array('name' => 'tripal'));
  473. $bdt_arr = tripal_core_chado_select('cvterm',array('cvterm_id'),$select);
  474. $brite_id = $bdt_arr[0]->cvterm_id;
  475. $filepos = 0;
  476. // iterate through the lines of the file and recurse through the various levels
  477. $handle = fopen($file,'r');
  478. while($line = fgets($handle)){
  479. $filepos += strlen($line);
  480. $line = trim($line);
  481. // the first line of the file provides the BRITE heirarchy name
  482. if(preg_match("/#.*nbsp;\s(.*)<\/h2>$/",$line,$matches)){
  483. // For each BRITE heirarchy file we'll add an analysisprop where we'll
  484. // store the report. If the CVTerm doesn't exist then add it.
  485. $heirarchy = $matches[1];
  486. $select = array('name' => $heirarchy,'cv_id' => array('name' => 'tripal'));
  487. $cvt_arr = tripal_core_chado_select('cvterm',array('cvterm_id'),$select);
  488. if (count($cvt_arr) == 0) {
  489. tripal_add_cvterms($type, "KEGG BRITE term: $type");
  490. $cvt_arr = tripal_core_chado_select('cvterm',array('cvterm_id'),$select);
  491. }
  492. $heirarchy_id = $cvt_arr[0]->cvterm_id;
  493. // now that we have the file type we can recurse
  494. $next = tripal_analysis_kegg_get_next_line($handle,$filepos);
  495. $results = tripal_analysis_kegg_recurse_heir($handle,$next,$query_re,
  496. $query_type,$query_uniquename,$base_path,$analysis_id,
  497. $brite_id,$heirarchy,$filepos);
  498. }
  499. }
  500. fclose($handle);
  501. return $results;
  502. }
  503. /**
  504. *
  505. */
  506. function tripal_analysis_kegg_recurse_heir($handle, $line,$query_re,
  507. $query_type, $query_uniquename, $base_path, $analysis_id, $brite_id,
  508. $heirarchy,&$filepos)
  509. {
  510. $results = array();
  511. // get the current level and the value
  512. $level = $line[0];
  513. $value = $line[1];
  514. // now get the next line to see what is coming next. If the
  515. // next level is greater then recurse immediately.
  516. $prevpos = $filepos;
  517. while($next = tripal_analysis_kegg_get_next_line($handle,$filepos)){
  518. $next_level = $next[0];
  519. $next_value = $next[1];
  520. // check this line to see if it has a feature we need to keep
  521. $ret = tripal_analysis_kegg_check_line_handle_feature($query_re,
  522. $query_type, $query_uniquename, $base_path, $analysis_id, $brite_id,
  523. $heirarchy,$value);
  524. if($ret){
  525. $results[] = $ret;
  526. }
  527. // if we're going up a level then recurse immediately and add results to our array
  528. if(ord($next_level) > ord($level)){
  529. // now recurse
  530. $ret = tripal_analysis_kegg_recurse_heir($handle,$next,
  531. $query_re,$query_type, $query_uniquename, $base_path, $analysis_id,
  532. $brite_id,$heirarchy,$filepos);
  533. if(count($ret) > 0){
  534. $results[][$value] = $ret;
  535. }
  536. }
  537. // if we go down a level on the next line then reset the
  538. // filepointer and return
  539. elseif(ord($next_level) < ord($level)){
  540. fseek($handle,$prevpos);
  541. $filepos = $prevpos;
  542. return $results;
  543. }
  544. else {
  545. $line = $next;
  546. $level = $next[0];
  547. $value = $next[1];
  548. }
  549. $prevpos = $filepos;
  550. }
  551. return $results;
  552. }
  553. /**
  554. *
  555. */
  556. function tripal_analysis_kegg_get_next_line($handle,&$filepos){
  557. $good = 0;
  558. // get the next line in the file
  559. $line = fgets($handle);
  560. $filepos += strlen($line);
  561. // we hit the end of the file, so exit with a null
  562. if(!$line){
  563. return null;
  564. }
  565. while(!$good){
  566. $line = trim($line);
  567. preg_match("/^([ABCDEFGHIJKLMNOP])\s*(.*)/",$line,$matches);
  568. $level = $matches[1];
  569. $value = $matches[2];
  570. // skip lines that aren't data or are empty
  571. if($level and $value) {
  572. // change all relative paths to absolute paths pointing to KEGG (www.genome.jp)
  573. // add id to <a> tags so we can link kegg.gif to it in tripal_analysis_kegg.css
  574. $value = preg_replace("/<a href=\"\//i","<a href=\"http://www.genome.jp/",$value);
  575. $value = preg_replace("/<a href=\"/i","<a id=\"tripal_kegg_brite_links\" target=\"_blank\" href=\"",$value);
  576. // this line is good so let's exit out
  577. $good = 1;
  578. } else {
  579. $line = fgets($handle);
  580. $filepos += strlen($line);
  581. // we hit the end of the file, so exit with a null
  582. if(!$line){
  583. return null;
  584. }
  585. }
  586. }
  587. return array($level,$value);
  588. }
  589. /**
  590. *
  591. */
  592. function tripal_analysis_kegg_check_line_handle_feature($query_re,
  593. $query_type, $query_uniquename, $base_path, $analysis_id, $brite_id,
  594. $heirarchy,$value)
  595. {
  596. // extract the features that have been mapped to the KEGG IDs
  597. if(preg_match("/^(.*?);\s*(\<a.+)/",$value,$matches)){
  598. $has_feature = 1;
  599. $fname = $matches[1];
  600. $keggterm = $matches[2];
  601. // get the feature name using the user's regular expression
  602. if ($query_re and preg_match("/$query_re/", $fname, $matches)) {
  603. $feature = $matches[1];
  604. }
  605. // If not in above format then pull up to the first space
  606. else {
  607. if (preg_match('/^(.*?)\s.*$/', $fname, $matches)) {
  608. $feature = $matches[1];
  609. }
  610. // if no match up to the first space then just use the entire string
  611. else {
  612. $feature = $fname;
  613. }
  614. }
  615. // now find the feature in chado
  616. $select = array();
  617. if($query_uniquename){
  618. $select['uniquename'] = $feature;
  619. } else {
  620. $select['name'] = $feature;
  621. }
  622. if($query_type){
  623. $select['type_id'] = array(
  624. 'cv_id' => array(
  625. 'name' => 'sequence'
  626. ),
  627. 'name' => $query_type,
  628. );
  629. }
  630. $feature_arr = tripal_core_chado_select('feature',array('feature_id'),$select);
  631. if(count($feature_arr) > 1){
  632. print "Ambiguous: '$feature' matches more than one feature and is being skipped.\n";
  633. continue;
  634. }
  635. if(count($feature_arr) == 0){
  636. print "Failed: '$feature' cannot find a matching feature in the database.\n";
  637. continue;
  638. }
  639. $feature_id = $feature_arr[0]->feature_id;
  640. if($feature_id){
  641. print "Adding KEGG term for $feature ($feature_id,$analysis_id). $heirarchy\n";
  642. // add this term to the analysis feature properties
  643. tripal_analysis_kegg_insert_featureprop($feature_id,$analysis_id,
  644. $brite_id,$keggterm);
  645. // get the node ID of the feature if one exists
  646. $sql = "SELECT nid FROM {chado_feature} WHERE feature_id = %d";
  647. $nid = db_result(db_query($sql, $feature_id));
  648. // Add link to each matched feature
  649. if($nid){
  650. $value = preg_replace("/^(.*?)(;\s*\<a)/","<a id=\"tripal_kegg_feature_links\" target=\"_blank\" href=\"".url("node/$nid")."\">"."$1"."</a>"."$2",$value);
  651. }
  652. // if we have a feature match then add this to our results array
  653. return $value;
  654. }
  655. }
  656. return null;
  657. }
  658. /**
  659. *
  660. */
  661. function tripal_analysis_kegg_insert_featureprop ($feature_id, $analysis_id,
  662. $brite_id,$keggterm)
  663. {
  664. // add the analysisfeature record if it doesn't already exist.
  665. $values = array('feature_id' => $feature_id,'analysis_id' => $analysis_id);
  666. $analysisfeature_arr = tripal_core_chado_select('analysisfeature',
  667. array('analysisfeature_id'),$values);
  668. if(count($analysisfeature_arr) == 0){
  669. tripal_core_chado_insert('analysisfeature',$values);
  670. $analysisfeature_arr = tripal_core_chado_select('analysisfeature',
  671. array('analysisfeature_id'),$values);
  672. }
  673. $analysisfeature_id = $analysisfeature_arr[0]->analysisfeature_id;
  674. // Insert into analysisfeatureprop if the value doesn't already exist
  675. // KEGG heir results sometimes have the same record more than once.
  676. if($analysisfeature_id){
  677. // Get the highest rank for this feature_id in analysisfeatureprop table
  678. $sql = "SELECT rank FROM analysisfeatureprop WHERE analysisfeature_id = %d and type_id = %d ORDER BY rank DESC";
  679. $previous_db = tripal_db_set_active('chado');
  680. $result = db_fetch_object(db_query($sql,$analysisfeature_id,$brite_id));
  681. tripal_db_set_active($previous);
  682. $rank = 0;
  683. if ($result and $result->rank > 0) {
  684. $rank = $result->rank + 1;
  685. }
  686. $values = array(
  687. 'analysisfeature_id' => $analysisfeature_id,
  688. 'type_id' => $brite_id,
  689. 'value' => $keggterm,
  690. 'rank' => $rank,
  691. );
  692. return tripal_core_chado_insert('analysisfeatureprop',$values);
  693. }
  694. else {
  695. return 0;
  696. }
  697. }
  698. /**
  699. *
  700. *
  701. * @ingroup tripal_analysis_kegg
  702. */
  703. function tripal_analysis_kegg_block($op = 'list', $delta = 0, $edit=array()){
  704. switch($op) {
  705. case 'list':
  706. $blocks['featurekegg']['info'] = t('Tripal Feature KEGG Terms');
  707. $blocks['featurekegg']['cache'] = BLOCK_NO_CACHE;
  708. $blocks['orgkegg']['info'] = t('Organism KEGG Summary');
  709. $blocks['orgkegg']['cache'] = BLOCK_NO_CACHE;
  710. $blocks['kegg_base']['info'] = t('Analsyis: KEGG Details');
  711. $blocks['kegg_base']['cache'] = BLOCK_NO_CACHE;
  712. return $blocks;
  713. case 'view':
  714. if(user_access('access chado_analysis_kegg content') and arg(0) == 'node' and is_numeric(arg(1))) {
  715. $nid = arg(1);
  716. $node = node_load($nid);
  717. $block = array();
  718. switch($delta){
  719. case 'featurekegg':
  720. $block['subject'] = t('KEGG Terms');
  721. $block['content'] = theme('tripal_feature_kegg_terms',$node);
  722. break;
  723. case 'orgkegg':
  724. $block['subject'] = t('KEGG Terms');
  725. $block['content'] = theme('tripal_organism_kegg_summary',$node);
  726. break;
  727. case 'kegg_base':
  728. $block['subject'] = t('KEGG Terms');
  729. $block['content'] = theme('tripal_analysis_kegg_report',$node);
  730. break;
  731. default :
  732. }
  733. return $block;
  734. }
  735. }
  736. }
  737. /*******************************************************************************
  738. * HOOK: Implementation of hook_nodeapi()
  739. * Display library information for associated features or organisms
  740. * This function also provides contents for indexing
  741. */
  742. function tripal_analysis_kegg_nodeapi(&$node, $op, $teaser, $page) {
  743. switch ($op) {
  744. case 'view':
  745. // add the library to the organism/feature search indexing
  746. if($node->build_mode == NODE_BUILD_SEARCH_INDEX){
  747. $node->content['tripal_analysis_kegg_search_index'] = array(
  748. '#value' => theme('tripal_analysis_kegg_search_index',$node),
  749. '#weight' => 6,
  750. );
  751. } else if ($node->build_mode == NODE_BUILD_SEARCH_RESULT) {
  752. $node->content['tripal_analysis_kegg_search_result'] = array(
  753. '#value' => theme('tripal_analysis_kegg_search_result',$node),
  754. '#weight' => 6,
  755. );
  756. } else {
  757. switch($node->type){
  758. case 'chado_organism':
  759. // Show KEGG report on the analysis page
  760. $node->content['tripal_organism_kegg_summary'] = array(
  761. '#value' => theme('tripal_organism_kegg_summary', $node),
  762. );
  763. break;
  764. case 'chado_feature':
  765. // Show KEGG terms on a feature page
  766. $node->content['tripal_feature_kegg_terms'] = array(
  767. '#value' => theme('tripal_feature_kegg_terms', $node),
  768. );
  769. }
  770. }
  771. break;
  772. }
  773. }
  774. /************************************************************************
  775. * We need to let drupal know about our theme functions and their arguments.
  776. * We create theme functions to allow users of the module to customize the
  777. * look and feel of the output generated in this module
  778. */
  779. function tripal_analysis_kegg_theme () {
  780. return array(
  781. 'tripal_analysis_kegg_search_index' => array (
  782. 'arguments' => array('node'),
  783. ),
  784. 'tripal_analysis_kegg_search_result' => array (
  785. 'arguments' => array('node'),
  786. ),
  787. 'tripal_organism_kegg_summary' => array (
  788. 'arguments' => array('node'=> null),
  789. 'template' => 'tripal_organism_kegg_summary',
  790. ),
  791. 'tripal_feature_kegg_terms' => array (
  792. 'arguments' => array('node'=> null),
  793. 'template' => 'tripal_feature_kegg_terms',
  794. ),
  795. 'tripal_analysis_kegg_report' => array (
  796. 'arguments' => array('node'=> null),
  797. 'template' => 'tripal_analysis_kegg_report',
  798. ),
  799. );
  800. }
  801. /**
  802. *
  803. *
  804. * @ingroup tripal_analysis_kegg
  805. */
  806. function tripal_analysis_kegg_preprocess(&$variables){
  807. // if the template file is the default node template file then we want
  808. // to add all of our variables.
  809. if($variables['template_files'][0] == 'node-chado_analysis_kegg'){
  810. $analysis = $variables['node']->analysis;
  811. $report = tripal_analysis_kegg_full_report($analysis->analysis_id);
  812. $analysis->tripal_analysis_kegg->kegg_report = $report;
  813. }
  814. }
  815. /*******************************************************************************
  816. *
  817. */
  818. function tripal_analysis_kegg_preprocess_tripal_organism_kegg_summary(&$variables){
  819. $node = $variables['node'];
  820. $organism = $node->organism;
  821. $organism->tripal_analysis_kegg->select_form = tripal_analysis_kegg_load_organism_kegg_summary($node);
  822. }
  823. /**
  824. *
  825. */
  826. function tripal_analysis_kegg_preprocess_tripal_feature_kegg_terms(&$variables){
  827. $node = $variables['node'];
  828. $feature = $node->feature;
  829. $feature->tripal_analysis_kegg->results = tripal_analysis_kegg_load_feature_terms($feature);
  830. }
  831. /************************************************************************
  832. */
  833. function theme_tripal_analysis_kegg_search_index($node){
  834. if ($node->type == 'chado_feature') {
  835. // Find cvterm_id for 'kegg_brite_data'
  836. $sql = "SELECT cvterm_id
  837. FROM {cvterm} CVT
  838. INNER JOIN cv ON cv.cv_id = CVT.cv_id
  839. WHERE cv.name = 'tripal'
  840. AND CVT.name = '%s'";
  841. $previous_db = tripal_db_set_active('chado');
  842. $brite_id = db_result(db_query($sql, 'kegg_brite_data'));
  843. // Get analysis id
  844. $sql = "SELECT analysis_id AS aid
  845. FROM {analysisfeature} AF
  846. INNER JOIN analysisfeatureprop AFP ON AF.analysisfeature_id = AFP.analysisfeature_id
  847. WHERE feature_id = %d
  848. AND AFP.type_id = %d
  849. GROUP BY analysis_id";
  850. $feature = $node->feature;
  851. $feature_id = $feature->feature_id;
  852. $hasResult = db_result(db_query($sql, $feature_id, $brite_id));
  853. $result = db_query($sql, $feature->feature_id, $brite_id);
  854. // Show kegg result ORDER BY time
  855. if ($hasResult) { // If there is any result, show expandable box
  856. $content = "";
  857. while ($ana = db_fetch_object($result)) {
  858. // Show analysis date
  859. $sql = "SELECT name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  860. FROM {analysis}
  861. WHERE analysis_id = %d";
  862. $ana_details = db_fetch_object(db_query($sql, $ana->aid));
  863. // Find node id for the analysis
  864. tripal_db_set_active($previous_db);
  865. $ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $ana->aid));
  866. $ana_url = url("node/".$ana_nid);
  867. $previous_db = tripal_db_set_active('chado');
  868. // Show content
  869. $content .= "$ana_details->name";
  870. // Show Kegg results
  871. $sql = "SELECT AFP.value AS afpvalue
  872. FROM {analysisfeatureprop} AFP
  873. INNER JOIN analysisfeature AF on AF.analysisfeature_id = AFP.analysisfeature_id
  874. WHERE AF.analysis_id = %d
  875. AND AF.feature_id = %d
  876. ";
  877. $kegg_results = db_query($sql, $ana->aid, $feature_id);
  878. while ($afp = db_fetch_object($kegg_results)) {
  879. $content .= " $afp->afpvalue";
  880. }
  881. }
  882. }
  883. tripal_db_set_active($previous_db);
  884. return $content;
  885. }
  886. }
  887. /************************************************************************
  888. */
  889. function theme_tripal_analysis_kegg_search_result($node){
  890. $content = theme_tripal_analysis_kegg_node_add($node);
  891. return $content;
  892. }
  893. /************************************************************************
  894. */
  895. function tripal_analysis_kegg_load_organism_kegg_summary($node) {
  896. $organism = $node->organism;
  897. // find analyses that have KEGG terms
  898. $sql = "
  899. SELECT *
  900. FROM {kegg_by_organism} KBO
  901. WHERE organism_id = %d
  902. ORDER BY analysis_id DESC
  903. ";
  904. $previous_db = tripal_db_set_active('chado');
  905. $results = db_fetch_object(db_query($sql,$organism->organism_id));
  906. tripal_db_set_active($previous_db);
  907. $has_results = 0;
  908. if($results){
  909. $has_results = 1;
  910. }
  911. return array (
  912. 'has_results' => $has_results,
  913. 'form' => drupal_get_form('tripal_analysis_kegg_select_form',$node),
  914. );
  915. }
  916. /************************************************************************
  917. // Show Kegg additional information on a KEGG Analysis page
  918. if ($node->type == 'chado_analysis_kegg') {
  919. return tripal_analysis_kegg_full_report($node->analysis_id);
  920. }
  921. // Show Kegg-info-box on a Feature page
  922. else if ($node->type == 'chado_feature') {
  923. return tripal_analysis_kegg_feature_add($node);
  924. }
  925. return $content;
  926. }
  927. */
  928. function tripal_analysis_kegg_org_report($analysis_id){
  929. $content = tripal_analysis_kegg_full_report($analysis_id);
  930. $opt = array($content);
  931. return drupal_json($opt);
  932. }
  933. /************************************************************************
  934. */
  935. function tripal_analysis_kegg_full_report($analysis_id){
  936. // Test if brite data have been parsed into the database
  937. $sql = "SELECT CVT.name, CVT.cvterm_id
  938. FROM {cvterm} CVT
  939. INNER JOIN analysisprop AP ON CVT.cvterm_id = AP.type_id
  940. WHERE AP.analysis_id = %d
  941. AND CVT.definition LIKE 'KEGG BRITE term: %'
  942. ORDER BY CVT.cvterm_id";
  943. $previous_db = tripal_db_set_active('chado');
  944. $result = db_query($sql, $analysis_id);
  945. tripal_db_set_active($previous_db);
  946. if (db_result($result)) {
  947. $content = tripal_analysis_kegg_brite($analysis_id, $type_id, 0);
  948. } else {
  949. $content = "<i>Note:</i> Analysis result is not available. Please schedule and run the job to parse the kegg output.";
  950. }
  951. return $content;
  952. }
  953. /*******************************************************************************
  954. * Tripal Kegg administrative setting form. This function is called by
  955. * tripal_analysis module which asks for an admin form to show on the page
  956. */
  957. function tripal_analysis_kegg_get_settings() {
  958. // Get an array of node types with internal names as keys
  959. $options = node_get_types('names');
  960. // Add 'chado_feature' to allowed content types for showing kegg results
  961. $allowedoptions ['chado_feature'] = "Show KEGG results on feature pages";
  962. $allowedoptions ['chado_analysis_kegg'] = "Show KEGG BRITE results on the analysis page.";
  963. $allowedoptions ['chado_organism'] = "Show KEGG BRITE results on the organism pages.";
  964. $form['description'] = array(
  965. '#type' => 'item',
  966. '#value' => t("Some chado features were analyzed by KEGG automatic annotation server (KAAS). This option allows user to display the kegg analysis results. Please read user manual for storage and display of kegg files. Check the box to enable the analysis results. Uncheck to disable it."),
  967. '#weight' => 0,
  968. );
  969. $form['tripal_analysis_kegg_setting'] = array(
  970. '#type' => 'checkboxes',
  971. '#options' => $allowedoptions,
  972. '#default_value' => variable_get('tripal_analysis_kegg_setting',
  973. array('chado_feature', 'chado_analysis_kegg')),
  974. );
  975. $settings->form = $form;
  976. $settings->title = "Tripal Kegg";
  977. return $settings;
  978. }
  979. /************************************************************************
  980. */
  981. function tripal_analysis_kegg_organism_results($node) {
  982. $node = node_load($node);
  983. return tripal_analysis_kegg_organism_add($node);
  984. }
  985. /************************************************************************
  986. */
  987. function tripal_analysis_kegg_load_feature_terms($feature) {
  988. $feature_id = $feature->feature_id;
  989. // Get the KEGG results stored using the term 'kegg_brite_data'
  990. $select = array(
  991. 'analysisfeature_id' => array(
  992. 'feature_id' => $feature_id,
  993. ),
  994. 'type_id' => array(
  995. 'name' => 'kegg_brite_data',
  996. 'cv_id' => array(
  997. 'name' => 'tripal'
  998. ),
  999. ),
  1000. );
  1001. $afeatureprops = tripal_core_chado_select('analysisfeatureprop',array('*'),$select);
  1002. // iterate through all of the KEGG properties for this feature
  1003. $results = array();
  1004. foreach ($afeatureprops as $index => $afeatureprop) {
  1005. // get the analysis feature record
  1006. $analysisfeature_arr = tripal_core_chado_select('analysisfeature',array('analysis_id'),
  1007. array('analysisfeature_id' => $afeatureprop->analysisfeature_id));
  1008. $analysisfeature = $analysisfeature_arr[0];
  1009. // get the analysis record and the analysis_id
  1010. $analysis = tripal_core_generate_chado_var('analysis',
  1011. array('analysis_id' => $analysisfeature->analysis_id));
  1012. $analysis_id = $analysis->analysis_id;
  1013. $results[$analysis_id]['analysis'] = $analysis;
  1014. }
  1015. // now get all the terms for each analysis
  1016. foreach($results as $analysis_id => $arr){
  1017. $select = array(
  1018. 'analysisfeature_id' => array(
  1019. 'analysis_id' => $analysis_id,
  1020. 'feature_id' => $feature_id,
  1021. ),
  1022. 'type_id' => array(
  1023. 'name' => 'kegg_brite_data',
  1024. 'cv_id' => array(
  1025. 'name' => 'tripal',
  1026. ),
  1027. ),
  1028. );
  1029. $terms = tripal_core_chado_select('analysisfeatureprop',array('*'),$select);
  1030. foreach ($terms as $term){
  1031. $results[$analysis_id]['terms'][] = $term->value;
  1032. }
  1033. }
  1034. return $results;
  1035. }
  1036. /************************************************************************
  1037. */
  1038. function tripal_analysis_kegg_select_form(&$form_state = NULL,$node){
  1039. $form = array();
  1040. // find analyses that have KEGG terms
  1041. $sql = "
  1042. SELECT *
  1043. FROM {kegg_by_organism} KBO
  1044. WHERE organism_id = %d
  1045. ORDER BY analysis_id DESC
  1046. ";
  1047. $previous_db = tripal_db_set_active('chado');
  1048. $results = db_query($sql,$node->organism->organism_id);
  1049. tripal_db_set_active($previous_db);
  1050. $analyses = array();
  1051. $analyses[''] = '';
  1052. while($analysis = db_fetch_object($results)){
  1053. $analyses[$analysis->analysis_id] = "$analysis->analysis_name";
  1054. }
  1055. global $base_url;
  1056. // we need to set some hidden values for the javascript to use
  1057. $form['theme_dir'] = array(
  1058. '#type' => 'hidden',
  1059. '#value' => drupal_get_path('theme', 'tripal'),
  1060. );
  1061. $form['base_url'] = array(
  1062. '#type' => 'hidden',
  1063. '#value' => $base_url,
  1064. );
  1065. // now generate the select box
  1066. global $base_url;
  1067. $theme_dir = drupal_get_path('theme', 'tripal');
  1068. $form['tripal_analysis_kegg_select'] = array(
  1069. '#title' => t('Select a KEGG report to view'),
  1070. '#description' => t('Any analysis with KEGG results related to this organism are available for viewing. For further information, see the analysis information page.'),
  1071. '#type' => 'select',
  1072. '#options' => $analyses,
  1073. '#attributes' => array (
  1074. 'onchange' => "tripal_analysis_kegg_org_report(this.options[this.selectedIndex].value,'".$base_url. "','".$theme_dir."')"
  1075. ),
  1076. );
  1077. return $form;
  1078. }
  1079. /*******************************************************************************
  1080. * Parsing KEGG HTML results that are stored in analysisfeatureprop for
  1081. * searching
  1082. * */
  1083. function tripal_analysis_kegg_extract_keywords ($analysis_id) {
  1084. print "Extracting keywords...\n";
  1085. // Get all interpro output hits except for records with 'No hits reported', 'parent', 'children'.
  1086. $output_type_id = tripal_get_cvterm_id('kegg_brite_data');
  1087. $sql = "SELECT AFP.analysisfeature_id, AFP.value FROM {analysisfeatureprop} AFP
  1088. INNER JOIN {analysisfeature} AF ON AF.analysisfeature_id = AFP.analysisfeature_id
  1089. WHERE type_id = $output_type_id
  1090. AND AF.analysis_id = $analysis_id";
  1091. $results = chado_query($sql);
  1092. $keyword_type_id = tripal_get_cvterm_id('analysis_kegg_output_keywords');
  1093. // Define what to be extracted in the array
  1094. $search = array (
  1095. "'<[/!]*?[^<>]*?>'si", // replace HTML tags with a space
  1096. "'\n'", // replace newlines with a space
  1097. );
  1098. $replace = array (
  1099. " ",
  1100. " ",
  1101. );
  1102. while ($record = db_fetch_object($results)) {
  1103. $af_id = $record->analysisfeature_id;
  1104. $value = $record->value;
  1105. // Retrive keywords for this analysisfeature_id if there is any
  1106. $sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id =$af_id AND type_id = $keyword_type_id";
  1107. $keywords = db_result(chado_query($sql));
  1108. // Extract new keywords from the interpro html output
  1109. $text = preg_replace($search, $replace, $value);
  1110. $new_keywords = trim(ereg_replace(' +', ' ', $text)); // remove extra spaces
  1111. // Append the new keywords
  1112. if ($keywords) {
  1113. $new_keywords = "$keywords $new_keywords";
  1114. $sql = "UPDATE {analysisfeatureprop} SET value = '$new_keywords' WHERE analysisfeature_id =$af_id AND type_id = $keyword_type_id ";
  1115. } else {
  1116. // Insert the keyword into the analysisfeatureprop table
  1117. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) VALUES ($af_id, $keyword_type_id, '$new_keywords', 0)";
  1118. }
  1119. chado_query($sql);
  1120. }
  1121. print "Finished.\n";
  1122. }