tripal_analysis_interpro.module 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953
  1. <?php
  2. // $Id:
  3. // Copyright 2009 Clemson University
  4. /*******************************************************************************
  5. * Tripal Interpro lets users show/hide iprscan results associated with a tripal
  6. * feature
  7. ******************************************************************************/
  8. function tripal_analysis_interpro_init(){
  9. // Add javascript and style sheet
  10. drupal_add_css(drupal_get_path('theme', 'tripal').
  11. '/css/tripal_analysis_interpro.css');
  12. // Add javascript and style sheet
  13. drupal_add_js(drupal_get_path('theme', 'tripal').
  14. '/js/tripal_analysis_interpro.js');
  15. }
  16. /*******************************************************************************
  17. * Provide information to drupal about the node types that we're creating
  18. * in this module
  19. */
  20. function tripal_analysis_interpro_node_info() {
  21. $nodes = array();
  22. $nodes['chado_analysis_interpro'] = array(
  23. 'name' => t('Analysis: Interpro'),
  24. 'module' => 'chado_analysis_interpro',
  25. 'description' => t('An interpro analysis from the chado database'),
  26. 'has_title' => FALSE,
  27. 'title_label' => t('Analysis: Interpro'),
  28. 'has_body' => FALSE,
  29. 'body_label' => t('Interpro Analysis Description'),
  30. 'locked' => TRUE
  31. );
  32. return $nodes;
  33. }
  34. /*******************************************************************************
  35. * Provide a Interpro Analysis form
  36. */
  37. function chado_analysis_interpro_form ($node){
  38. $type = node_get_types('type', $node);
  39. $form = array();
  40. $form['title']= array(
  41. '#type' => 'hidden',
  42. '#default_value' => $node->title,
  43. );
  44. $form['analysisname']= array(
  45. '#type' => 'textfield',
  46. '#title' => t('Analysis Name'),
  47. '#required' => FALSE,
  48. '#default_value' => $node->analysisname,
  49. '#weight' => 1
  50. );
  51. $form['program']= array(
  52. '#type' => 'textfield',
  53. '#title' => t('Program'),
  54. '#required' => TRUE,
  55. '#default_value' => $node->program,
  56. '#weight' => 2
  57. );
  58. $form['programversion']= array(
  59. '#type' => 'textfield',
  60. '#title' => t('Program Version'),
  61. '#required' => TRUE,
  62. '#default_value' => $node->programversion,
  63. '#weight' => 3
  64. );
  65. $form['algorithm']= array(
  66. '#type' => 'textfield',
  67. '#title' => t('Algorithm'),
  68. '#required' => FALSE,
  69. '#default_value' => $node->algorithm,
  70. '#weight' => 4
  71. );
  72. $form['sourcename']= array(
  73. '#type' => 'textfield',
  74. '#title' => t('Source Name'),
  75. '#required' => FALSE,
  76. '#default_value' => $node->sourcename,
  77. '#weight' => 5
  78. );
  79. $form['sourceversion']= array(
  80. '#type' => 'textfield',
  81. '#title' => t('Source Version'),
  82. '#required' => FALSE,
  83. '#default_value' => $node->sourceversion,
  84. '#weight' => 6
  85. );
  86. $form['sourceuri']= array(
  87. '#type' => 'textfield',
  88. '#title' => t('Source URI'),
  89. '#required' => FALSE,
  90. '#default_value' => $node->sourceuri,
  91. '#weight' => 7
  92. );
  93. // Get time saved in chado
  94. $default_time = $node->timeexecuted;
  95. $year = preg_replace("/^(\d+)-\d+-\d+ .*/", "$1", $default_time);
  96. $month = preg_replace("/^\d+-0?(\d+)-\d+ .*/", "$1", $default_time);
  97. $day = preg_replace("/^\d+-\d+-0?(\d+) .*/", "$1", $default_time);
  98. // If the time is not set, use current time
  99. if (!$default_time) {
  100. $default_time = time();
  101. $year = format_date($default_time, 'custom', 'Y');
  102. $month = format_date($default_time, 'custom', 'n');
  103. $day = format_date($default_time, 'custom', 'j');
  104. }
  105. $form['timeexecuted']= array(
  106. '#type' => 'date',
  107. '#title' => t('Time Executed'),
  108. '#required' => TRUE,
  109. '#default_value' => array(
  110. 'year' => $year,
  111. 'month' => $month,
  112. 'day' => $day,
  113. ),
  114. '#weight' => 8
  115. );
  116. $form['description']= array(
  117. '#type' => 'textarea',
  118. '#rows' => 15,
  119. '#title' => t('Description and/or Program Settings'),
  120. '#required' => FALSE,
  121. '#default_value' => check_plain($node->description),
  122. '#weight' => 9
  123. );
  124. //----InterProScan Settings (Shown only when Tripal Interpro is enabled) ----
  125. if (preg_match("/.*\|.*/",$node->interprofile)) {
  126. $prop_values = explode("|", $node->interprofile);
  127. $node->interprofile = $prop_values[0];
  128. $node->interproparameters = $prop_values[1];
  129. }
  130. $moreSettings ['interpro'] = 'Interpro Settings';
  131. $form['interpro'] = array(
  132. '#title' => t('Interpro Settings'),
  133. '#type' => 'fieldset',
  134. '#description' => t('Specific Settings for Interpro Analysis.'),
  135. '#collapsible' => TRUE,
  136. '#attributes' => array('id' => 'interpro-extra-settings'),
  137. '#weight' => 11
  138. );
  139. $form['interpro']['interprofile'] = array(
  140. '#title' => t('Interproscan Output File (in html format)'),
  141. '#type' => 'textfield',
  142. '#description' => t('The html output file generated by Interproscan in full path.'),
  143. '#default_value' => $node->interprofile,
  144. );
  145. $form['interpro']['interprojob'] = array(
  146. '#type' => 'checkbox',
  147. '#title' => t('Submit a job to parse the Interpro html output'),
  148. '#description' => t('Note: features associated with the interpro results must '.
  149. 'exist in chado before parsing the file. Otherwise, interpro '.
  150. 'results that cannot be linked to a feature will be '.
  151. 'discarded. Also, Triapl Interpro module needs to be enabled.'),
  152. '#default_value' => $node->interprojob,
  153. '#attributes' => array(
  154. 'onclick' => 'return isSubmittingJob(this)'
  155. )
  156. );
  157. $form['interpro']['parsego'] = array(
  158. '#type' => 'checkbox',
  159. '#title' => t('Load GO terms to the database'),
  160. '#description' => t('Check the box to load GO terms to chado database'),
  161. '#default_value' => $node->parsego
  162. );
  163. $form['interpro']['interproparameters'] = array(
  164. '#title' => t('Parameters'),
  165. '#type' => 'textfield',
  166. '#description' => t('The parameters for running the interpro analysis.'),
  167. '#default_value' => $node->interproparameters,
  168. );
  169. return $form;
  170. }
  171. function chado_analysis_interpro_insert($node){
  172. global $user;
  173. // Create a timestamp so we can insert it into the chado database
  174. $time = $node->timeexecuted;
  175. $month = $time['month'];
  176. $day = $time['day'];
  177. $year = $time['year'];
  178. $timestamp = $month.'/'.$day.'/'.$year;
  179. // If this analysis already exists then don't recreate it in chado
  180. $analysis_id = $node->analysis_id;
  181. if ($analysis_id) {
  182. $sql = "SELECT analysis_id ".
  183. "FROM {Analysis} ".
  184. "WHERE analysis_id = %d ";
  185. $previous_db = db_set_active('chado');
  186. $analysis = db_fetch_object(db_query($sql, $node->analysis_id));
  187. db_set_active($previous_db);
  188. }
  189. // If the analysis doesn't exist then let's create it in chado.
  190. if(!$analysis){
  191. // First add the item to the chado analysis table
  192. $sql = "INSERT INTO {analysis} ".
  193. " (name, description, program, programversion, algorithm, ".
  194. " sourcename, sourceversion, sourceuri, timeexecuted) ".
  195. "VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s')";
  196. $previous_db = db_set_active('chado'); // use chado database
  197. db_query($sql,$node->analysisname, $node->description,
  198. $node->program,$node->programversion,$node->algorithm,
  199. $node->sourcename, $node->sourceversion, $node->sourceuri,
  200. $timestamp);
  201. // find the newly entered analysis_id
  202. $sql = "SELECT analysis_id ".
  203. "FROM {Analysis} ".
  204. "WHERE program='%s'".
  205. "AND programversion='%s'".
  206. "AND sourcename='%s'";
  207. $analysis_id = db_result(db_query($sql, $node->program,
  208. $node->programversion, $node->sourcename));
  209. // Get cvterm_id for 'analysis_interpro_settings'
  210. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  211. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  212. "WHERE CVT.name = 'analysis_interpro_settings' ".
  213. "AND CV.name = 'tripal'";
  214. $type_id = db_result(db_query($sql));
  215. // Insert into chado {analysisprop} table
  216. $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) ".
  217. "VALUES (%d, %d, '%s')";
  218. $interprosettings = $node->interprofile."|".$node->interproparameters;
  219. db_query($sql, $analysis_id, $type_id, $interprosettings);
  220. db_set_active($previous_db); // switch back to drupal database
  221. // Add a job if the user wants to parse the html output
  222. if($node->interprojob) {
  223. $job_args[0] = $analysis_id;
  224. $job_args[1] = $node->interprofile;
  225. if ($node->parsego) {
  226. $job_args[2] = 1;
  227. } else {
  228. $job_args[2] = 0;
  229. }
  230. if (is_readable($node->interprofile)) {
  231. $fname = preg_replace("/.*\/(.*)/", "$1", $node->interprofile);
  232. tripal_add_job("Parse interpro: $fname",'tripal_analysis_interpro',
  233. 'tripal_analysis_interpro_parseHTMLFile', $job_args, $user->uid);
  234. } else {
  235. drupal_set_message("Can not open interpro output file. Job not scheduled.");
  236. }
  237. }
  238. }
  239. // Make sure the entry for this analysis doesn't already exist in the
  240. // chado_analysis table if it doesn't exist then we want to add it.
  241. $node_check_sql = "SELECT * FROM {chado_analysis} ".
  242. "WHERE analysis_id = %d";
  243. $node_check = db_fetch_object(db_query($node_check_sql, $analysis_id));
  244. if(!$node_check){
  245. // next add the item to the drupal table
  246. $sql = "INSERT INTO {chado_analysis} (nid, vid, analysis_id) ".
  247. "VALUES (%d, %d, %d)";
  248. db_query($sql,$node->nid,$node->vid,$analysis_id);
  249. // Create a title for the analysis node using the unique keys so when the
  250. // node is saved, it will have a title
  251. $record = new stdClass();
  252. // If the analysis has a name, use it as the node title. If not, construct
  253. // the title using program, programversion, and sourcename
  254. if ($node->analysisname) {
  255. $record->title = $node->analysisname;
  256. } else {
  257. //Construct node title as "program (version)
  258. $record->title = "$node->program ($node->programversion)";
  259. }
  260. $record->nid = $node->nid;
  261. drupal_write_record('node',$record,'nid');
  262. drupal_write_record('node_revisions',$record,'nid');
  263. }
  264. }
  265. /*******************************************************************************
  266. * Delete interpro anlysis
  267. */
  268. function chado_analysis_interpro_delete($node){
  269. // Before removing, get analysis_id so we can remove it from chado database
  270. // later
  271. $sql_drupal = "SELECT analysis_id ".
  272. "FROM {chado_analysis} ".
  273. "WHERE nid = %d ".
  274. "AND vid = %d";
  275. $analysis_id = db_result(db_query($sql_drupal, $node->nid, $node->vid));
  276. // Remove data from the {chado_analysis}, {node}, and {node_revisions} tables
  277. $sql_del = "DELETE FROM {chado_analysis} ".
  278. "WHERE nid = %d ".
  279. "AND vid = %d";
  280. db_query($sql_del, $node->nid, $node->vid);
  281. $sql_del = "DELETE FROM {node} ".
  282. "WHERE nid = %d ".
  283. "AND vid = %d";
  284. db_query($sql_del, $node->nid, $node->vid);
  285. $sql_del = "DELETE FROM {node_revisions} ".
  286. "WHERE nid = %d ".
  287. "AND vid = %d";
  288. db_query($sql_del, $node->nid, $node->vid);
  289. //Remove from analysisfeatureprop, analysisfeature, analysis, and analysisprop tables
  290. $previous_db = db_set_active('chado');
  291. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE analysis_id=%d";
  292. $results = db_query($sql, $analysis_id);
  293. while ($af = db_fetch_object($results)) {
  294. db_query("DELETE FROM {analysisfeatureprop} WHERE analysisfeature_id = %d", $af->analysisfeature_id);
  295. }
  296. db_query("DELETE FROM {analysisfeature} WHERE analysis_id = %d", $analysis_id);
  297. db_query("DELETE FROM {analysisprop} WHERE analysis_id = %d", $analysis_id);
  298. db_query("DELETE FROM {analysis} WHERE analysis_id = %d", $analysis_id);
  299. db_set_active($previous_db);
  300. }
  301. /*******************************************************************************
  302. * Update interpro analysis
  303. */
  304. function chado_analysis_interpro_update($node){
  305. global $user;
  306. if($node->revision){
  307. // TODO -- decide what to do about revisions
  308. } else {
  309. // Create a timestamp so we can insert it into the chado database
  310. $time = $node->timeexecuted;
  311. $month = $time['month'];
  312. $day = $time['day'];
  313. $year = $time['year'];
  314. $timestamp = $month.'/'.$day.'/'.$year;
  315. // get the analysis_id for this node:
  316. $sql = "SELECT analysis_id ".
  317. "FROM {chado_analysis} ".
  318. "WHERE vid = %d";
  319. $analysis_id = db_fetch_object(db_query($sql, $node->vid))->analysis_id;
  320. $sql = "UPDATE {analysis} ".
  321. "SET name = '%s', ".
  322. " description = '%s', ".
  323. " program = '%s', ".
  324. " programversion = '%s', ".
  325. " algorithm = '%s', ".
  326. " sourcename = '%s', ".
  327. " sourceversion = '%s', ".
  328. " sourceuri = '%s', ".
  329. " timeexecuted = '%s' ".
  330. "WHERE analysis_id = %d ";
  331. $previous_db = db_set_active('chado'); // use chado database
  332. db_query($sql, $node->analysisname, $node->description, $node->program,
  333. $node->programversion,$node->algorithm,$node->sourcename,
  334. $node->sourceversion, $node->sourceuri, $timestamp, $analysis_id);
  335. // Get cvterm_id for 'analysis_interpro_settings'
  336. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  337. "INNER JOIN cv CV ON CV.cv_id = CVT.cv_id ".
  338. "WHERE CVT.name = 'analysis_interpro_settings' ".
  339. "AND CV.name = 'tripal'";
  340. $type_id = db_result(db_query($sql));
  341. $sql = "UPDATE {analysisprop} ".
  342. "SET value = '%s' ".
  343. "WHERE analysis_id = %d AND type_id = %d";
  344. $interprosettings = $node->interprofile."|".$node->interproparameters;
  345. db_query($sql, $interprosettings, $analysis_id, $type_id);
  346. db_set_active($previous_db); // switch back to drupal database
  347. // Add a job if the user wants to parse the html output
  348. if($node->interprojob) {
  349. $job_args[0] = $analysis_id;
  350. $job_args[1] = $node->interprofile;
  351. if ($node->parsego) {
  352. $job_args[2] = 1;
  353. } else {
  354. $job_args[2] = 0;
  355. }
  356. if (is_readable($node->interprofile)) {
  357. $fname = preg_replace("/.*\/(.*)/", "$1", $node->interprofile);
  358. tripal_add_job("Parse interpro: $fname",'tripal_analysis_interpro',
  359. 'tripal_analysis_interpro_parseHTMLFile', $job_args, $user->uid);
  360. } else {
  361. drupal_set_message("Can not open interpro output file. Job not scheduled.");
  362. }
  363. }
  364. // Create a title for the analysis node using the unique keys so when the
  365. // node is saved, it will have a title
  366. $record = new stdClass();
  367. // If the analysis has a name, use it as the node title. If not, construct
  368. // the title using program, programversion, and sourcename
  369. if ($node->analysisname) {
  370. $record->title = $node->analysisname;
  371. } else {
  372. //Construct node title as "program (version)
  373. $record->title = "$node->program ($node->programversion)";
  374. }
  375. $record->nid = $node->nid;
  376. drupal_write_record('node',$record,'nid');
  377. drupal_write_record('node_revisions',$record,'nid');
  378. }
  379. }
  380. /*******************************************************************************
  381. * When a node is requested by the user this function is called to allow us
  382. * to add auxiliary data to the node object.
  383. */
  384. function chado_analysis_interpro_load($node){
  385. // get the analysis_id for this node:
  386. $sql = "SELECT analysis_id FROM {chado_analysis} WHERE vid = %d";
  387. $ana_node = db_fetch_object(db_query($sql, $node->vid));
  388. $additions = new stdClass();
  389. if ($ana_node) {
  390. // get analysis information
  391. $sql = "SELECT Analysis_id, name AS analysisname, description, program, ".
  392. " programversion, algorithm, sourcename, sourceversion, ".
  393. " sourceuri, timeexecuted ".
  394. "FROM {Analysis} ".
  395. "WHERE Analysis_id = $ana_node->analysis_id";
  396. $previous_db = db_set_active('chado'); // use chado database
  397. $additions = db_fetch_object(db_query($sql));
  398. // get cvterm_id for 'analysis_interpro_settings'
  399. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  400. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  401. "WHERE CVT.name = 'analysis_interpro_settings' ".
  402. "AND CV.name = 'tripal'";
  403. $type_id = db_result(db_query($sql));
  404. // get analysisprop information
  405. $sql = "SELECT value FROM {analysisprop} ".
  406. "WHERE analysis_id = %d ".
  407. "AND type_id = %d";
  408. $analysisprop = db_result(db_query($sql, $ana_node->analysis_id, $type_id));
  409. $prop_values = explode ("|", $analysisprop, 1);
  410. $additions->interprofile = $prop_values[0];
  411. $additions->interproparameters = $prop_values[1];
  412. db_set_active($previous_db); // now use drupal database
  413. }
  414. // If the analysis has a name, use it as the node title. If not, construct
  415. // the title using program programversion, and sourcename
  416. if ($additions->analysisname) {
  417. $additions->title = $additions->analysisname;
  418. } else {
  419. // Construct node title as "program version (source)
  420. $additions->title = "$additions->program ($additions->programversion)";
  421. }
  422. return $additions;
  423. }
  424. /*******************************************************************************
  425. * This function customizes the view of the chado_analysis node. It allows
  426. * us to generate the markup.
  427. */
  428. function chado_analysis_interpro_view ($node, $teaser = FALSE, $page = FALSE) {
  429. // use drupal's default node view:
  430. if (!$teaser) {
  431. $node = node_prepare($node, $teaser);
  432. // When previewing a node submitting form, it shows 'Array' instead of
  433. // correct date format. We need to format the date here
  434. $time = $node->timeexecuted;
  435. if(is_array($time)){
  436. $month = $time['month'];
  437. $day = $time['day'];
  438. $year = $time['year'];
  439. $timestamp = $year.'-'.$month.'-'.$day;
  440. $node->timeexecuted = $timestamp;
  441. }
  442. // When viewing a node, we need to reformat the analysisprop since we
  443. // separate each value with a bar |
  444. if (preg_match("/.*\|.*/",$node->interprofile)) {
  445. $prop_values = explode("|", $node->interprofile);
  446. $node->interprofile = $prop_values[0];
  447. $node->interproparameters = $prop_values[1];
  448. }
  449. }
  450. return $node;
  451. }
  452. /*******************************************************************************
  453. * Parse Interpro HTML Output file into analysisfeatureprop table
  454. */
  455. function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $parsego, $job_id) {
  456. // Prepare log
  457. $filename = preg_replace("/.*\/(.*)/", "$1", $interprofile);
  458. $logfile = file_directory_path() . "/tripal/tripal_analysis_interpro/load_$filename.log";
  459. $log = fopen($logfile, 'a'); // append parsing results to log file
  460. // Parsing started
  461. print "Parsing File:".$interprofile." ...\n";
  462. fwrite($log, date("D M j G:i:s Y").". Loading $interprofile\n");
  463. // Get cvterm_id for 'analysis_interpro_output_iteration_hits' which is required
  464. // for inserting into the analysisfeatureprop table
  465. $previous_db = db_set_active('chado'); // use chado database
  466. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  467. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  468. "WHERE CVT.name = 'analysis_interpro_output_hit' ".
  469. "AND CV.name = 'tripal'";
  470. $type_id = db_result(db_query($sql));
  471. print "cvterm_id for analysis_interpro_output_iteration_hits is $type_id\n";
  472. // Load the HTML file and convert it into XML for loading
  473. $dom = new domDocument;
  474. $dom->loadHTMLFile($interprofile);
  475. $xml = $dom->saveXML();
  476. $interproput = simplexml_load_string($xml);
  477. // Get html tables for parsing
  478. $tables = $interproput->children()->children();
  479. // Count the number of tables to be processed
  480. $no_iterations = 0;
  481. foreach($tables as $tmp) {
  482. if ($tmp->getName() == 'table') {
  483. $no_iterations ++;
  484. }
  485. }
  486. print "$no_iterations html tables to be processed.\n";
  487. $interval = intval($no_iterations * 0.01);
  488. $idx_iterations = 0;
  489. // Processed the tables
  490. foreach ($tables as $table) {
  491. //if (preg_match('/No hits reported/', $table->asXML()) ) {
  492. //print "skipping this table b/c no hits are reported\n";
  493. //}
  494. // make sure we are looking at a table and its not an empty table
  495. if ($table->getName() == 'table' && !preg_match('/No hits reported/', $table->asXML()) ) {
  496. $idx_iterations ++;
  497. if ($idx_iterations % $interval == 0) {
  498. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  499. db_set_active($previous_db);
  500. tripal_job_set_progress($job_id, $percentage);
  501. $previous_db = db_set_active('chado');
  502. print $percentage."% ";
  503. }
  504. // Set job status
  505. // Get the first row and match its name with the feature name
  506. $firsttd = $table->children()->children()->children();
  507. $feature_id = 0;
  508. foreach($firsttd as $b) {
  509. foreach($b->children() as $a) {
  510. if ($a->getName() == 'a') {
  511. // Remove _ORF from the sequence name
  512. $seqname = preg_replace('/^(.+?)_\d_.+/', "$1", $a);
  513. print "seqname is $seqname\n";
  514. // Find out how many features match this uniquename
  515. $sql = "SELECT count(feature_id) FROM {feature} ".
  516. "WHERE uniquename = '%s' ";
  517. $no_features = db_result(db_query($sql, $seqname));
  518. // If there is only one match, get the feature_id
  519. if ($no_features == 1) {
  520. $sql = "SELECT feature_id FROM {feature} ".
  521. "WHERE uniquename = '%s' ";
  522. $feature_id = db_result(db_query($sql, $seqname));
  523. print "\tfeature id is $feature_id\n";
  524. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  525. } else if ($no_features > 1) {
  526. fwrite($log, "Ambiguous: ".$seqname." matches more than one feature and is not processed.\n");
  527. continue;
  528. // If the uniquename did not match, skip and print 'Failed'
  529. } else {
  530. fwrite($log, "Failed: ".$seqname."\n");
  531. }
  532. }
  533. }
  534. }
  535. // Successfully matched. print 'Succeeded'. Add analysis_id and
  536. // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
  537. if ($feature_id) {
  538. //------------------------------------
  539. // Clease unwanted rows from the table
  540. //------------------------------------
  541. $parent_row = "/<tr><td valign=\"top\"><b>Parent<\/b><\/td>\s*<td valign=\"top\">\s*no.*?parent<\/td>\s*<\/tr>/";
  542. $children_row = "/<tr><td valign=\"top\"><b>Children<\/b><\/td>\s*<td valign=\"top\">\s*no.*?children<\/td>\s*<\/tr>/";
  543. $found_row = "/<tr><td valign=\"top\"><b>Found.*?in<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  544. $contains_row = "/<tr><td valign=\"top\"><b>Contains<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  545. $go_row = "/<tr><td valign=\"top\"><b>GO.*?terms<\/b><\/td>\s*<td valign=\"top\">\s*none<\/td>\s*<\/tr>/";
  546. $table_txt = $table->asXML();
  547. $table_txt = preg_replace($parent_row, "", $table_txt);
  548. $table_txt = preg_replace($children_row, "", $table_txt);
  549. $table_txt = preg_replace($found_row, "", $table_txt);
  550. $table_txt = preg_replace($contains_row, "", $table_txt);
  551. $table_txt = preg_replace($go_row, "", $table_txt);
  552. //------------------------------------
  553. // Clease unwanted ORF link from table
  554. //------------------------------------
  555. $orf_link = "/<b><a href=\"\/iprscan\/wget.*?\">(.*?)<\/a><\/b>/";
  556. $table_txt = preg_replace($orf_link, "$1", $table_txt);
  557. //print "----------------------------\n";
  558. //print "old: ".$table->asXML()."\n\n\n";
  559. //print "----------------------------\n";
  560. //print "Fixed: $table_txt\n";
  561. //print "----------------------------\n";
  562. //------------------------------------
  563. // If this feature has already been associated with this analysis, do not reinsert
  564. // Otherwise, Insert into analysisfeature table
  565. //------------------------------------
  566. $sql = "Select analysisfeature_id as id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  567. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  568. if($analysisfeature){ $analysisfeature_id = $analysisfeature->id; }
  569. if(!$analysisfeature_id){
  570. print "inserting analysisfeature\n";
  571. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  572. "VALUES (%d, %d)";
  573. db_query ($sql, $feature_id, $analysis_id);
  574. $sql = "Select analysisfeature_id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  575. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  576. $analysisfeature_id = $analysisfeature->id;
  577. }
  578. print "analysisfeature_id is $analysisfeature_id (analysis_id = $analysis_id; feature_id = $feature_id)\n";
  579. // Get the higest rank for this feature_id in analysisfeatureprop table.
  580. // If the value of the inserting content is not duplicate, add it to
  581. // analysisfeaturepro with 'higest_rank + 1'
  582. $sql = "SELECT MAX(rank) FROM {analysisfeatureprop} AFP ".
  583. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  584. "WHERE feature_id=%d ".
  585. "AND analysis_id=%d ".
  586. "AND type_id=%d ";
  587. $afp = db_fetch_object(db_query($sql, $feature_id, $analysis_id, $type_id));
  588. $hi_rank = 0;
  589. if ($afp) {
  590. $hi_rank = $afp->max + 1;
  591. }
  592. //------------------------------------------------------------
  593. // Insert interpro html tags into analysisfeatureprop table
  594. //------------------------------------------------------------
  595. // Before inserting, make sure it's not a duplicate
  596. $sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id = %d AND type_id = %d";
  597. $result = db_query($sql, $analysisfeature_id, $type_id);
  598. $duplicate = 0;
  599. while ($afp_value = db_fetch_object($result)) {
  600. if ($table_txt == $afp_value->value) {
  601. $duplicate = 1;
  602. }
  603. }
  604. if (!$duplicate) {
  605. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  606. "VALUES (%d, %d, '%s', %d)";
  607. db_query($sql, $analysisfeature_id, $type_id, $table_txt, $hi_rank);
  608. fwrite($log, " (Insert)\n"); // write to log
  609. print "\twriting table\n";
  610. } else {
  611. fwrite($log, " (Skipped)\n");
  612. print "\tskipping table - dup\n";
  613. }
  614. // Parse GO terms. Make sure GO database schema is installed in chado
  615. $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
  616. if (!$go_db_id) {
  617. print 'GO schema not installed in chado. GO terms are not processed.';
  618. }
  619. if ($go_db_id && $parsego) {
  620. $trs = $table->children();
  621. foreach ($trs as $tr) {
  622. $tds = $tr->children();
  623. foreach($tds as $td) {
  624. $gotags = $td->children();
  625. foreach ($gotags as $gotag) {
  626. // Look for 'GO:accession#'
  627. if (preg_match("/^.*?GO:(\d+).*$/", $gotag, $matches)) {
  628. // Find cvterm_id for the matched GO term
  629. $sql = "SELECT cvterm_id FROM {cvterm} CVT
  630. INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
  631. WHERE DBX.accession = '%s' AND DBX.db_id = %d";
  632. $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
  633. //-------------------------------------------
  634. // Insert GO terms into feature_cvterm table
  635. //-------------------------------------------
  636. // Default pub_id = 1 (NULL) was used
  637. $sql = "INSERT INTO {feature_cvterm} (feature_id, cvterm_id, pub_id)
  638. VALUES (%d, %d, 1)";
  639. db_query($sql, $feature_id, $goterm_id);
  640. //------------------------------------------------
  641. // Insert GO terms into analysisfeatureprop table
  642. //------------------------------------------------
  643. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) ".
  644. "VALUES (%d, %d, '%s', 0)";
  645. db_query($sql, $analysisfeature_id, $goterm_id, $matches[1]);
  646. }
  647. }
  648. }
  649. }
  650. }
  651. }
  652. }
  653. }
  654. db_set_active ($previous_db); // Use drupal database
  655. print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
  656. fwrite($log, "\n");
  657. fclose($log);
  658. return;
  659. }
  660. /*******************************************************************************
  661. * tripal_analysis_interpro_nodeapi()
  662. * HOOK: Implementation of hook_nodeapi()
  663. * Display interpro results for allowed node types
  664. */
  665. function tripal_analysis_interpro_nodeapi(&$node, $op, $teaser, $page) {
  666. switch ($op) {
  667. case 'view':
  668. // Find out which node types for showing the interpro
  669. $types_to_show = variable_get('tripal_analysis_interpro_setting',
  670. array('chado_feature'));
  671. // Abort if this node is not one of the types we should show.
  672. if (!in_array($node->type, $types_to_show, TRUE)) {
  673. break;
  674. }
  675. // Add interpro to the content item if it's not a teaser
  676. if (!$teaser && $node->feature->feature_id) {
  677. if($node->build_mode == NODE_BUILD_SEARCH_INDEX){
  678. $node->content['tripal_analysis_interpro_index_version'] = array(
  679. '#value' => theme('parse_EBI_Interpro_XML_index_version',$node),
  680. '#weight' => 7,
  681. );
  682. } else {
  683. // Show interpro result if not at teaser view
  684. $node->content['tripal_analysis_interpro_form'] = array(
  685. '#value' => theme('tripal_analysis_interpro_results', $node),
  686. '#weight' => 7
  687. );
  688. }
  689. }
  690. }
  691. }
  692. /************************************************************************
  693. * We need to let drupal know about our theme functions and their arguments.
  694. * We create theme functions to allow users of the module to customize the
  695. * look and feel of the output generated in this module
  696. */
  697. function tripal_analysis_interpro_theme () {
  698. return array(
  699. 'parse_EBI_Interpro_XML_index_version' => array (
  700. 'arguments' => array('node'),
  701. ),
  702. 'tripal_analysis_interpro_results' => array (
  703. 'arguments' => array('node'),
  704. )
  705. );
  706. }
  707. /*******************************************************************************
  708. * Prepare interpro result for the feature shown on the page
  709. */
  710. function theme_tripal_analysis_interpro_results ($node) {
  711. $feature = $node->feature;
  712. $content = tripal_get_interpro_results($feature->feature_id);
  713. return $content;
  714. }
  715. /*******************************************************************************
  716. * Prepare interpro result for the feature shown on the page
  717. */
  718. function theme_parse_EBI_Interpro_XML_index_version ($node) {
  719. $feature = $node->feature;
  720. $content = tripal_get_interpro_results_index_version($feature->feature_id);
  721. return $content;
  722. }
  723. /*******************************************************************************
  724. * tripal_get_interpro_results()
  725. * Get interpro result from featureprop table for the feature
  726. */
  727. function tripal_get_interpro_results($feature_id){
  728. // Get cvterm_id for 'analysis_interpro_output_hit' which is required
  729. // for inserting into the analysisfeatureprop table
  730. $previous_db = db_set_active('chado');
  731. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT
  732. INNER JOIN cv ON cv.cv_id = CVT.cv_id
  733. WHERE CVT.name = 'analysis_interpro_output_hit'
  734. AND CV.name = 'tripal'";
  735. $type_id = db_result(db_query($sql));
  736. // Get analysis times for the feature
  737. $sql = "SELECT A.analysis_id AS aid
  738. FROM {analysis} A
  739. INNER JOIN analysisfeature AF ON A.analysis_id = AF.analysis_id
  740. INNER JOIN analysisfeatureprop AFP ON AF.analysisfeature_id = AFP.analysisfeature_id
  741. WHERE feature_id = %d
  742. AND AFP.type_id = %d
  743. GROUP BY A.analysis_id
  744. ";
  745. $hasResult = db_result(db_query($sql, $feature_id, $type_id));
  746. $result = db_query($sql, $feature_id, $type_id);
  747. // Show interpro result ORDER BY time
  748. if ($hasResult) { // If there is any result, show expandable box
  749. $content .= "<div id=\"interpro-hits\" class=\"tripal_interpro-info-box\">
  750. <div class=\"tripal_expandableBox\">
  751. <h3>InterProScan Analysis</h3>
  752. </div>
  753. <div class=\"tripal_expandableBoxContent\">
  754. <span>
  755. <table class=\"tripal_interpro_results_table\">
  756. <tr><td>";
  757. while ($ana = db_fetch_object($result)) {
  758. // Show analysis date
  759. $sql = "SELECT name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  760. FROM {analysis}
  761. WHERE analysis_id = %d";
  762. $ana_details = db_fetch_object(db_query($sql, $ana->aid));
  763. // Find node id for the analysis
  764. db_set_active($previous_db);
  765. $ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $ana->aid));
  766. $ana_url = url("node/".$ana_nid);
  767. $previous_db = db_set_active('chado');
  768. // Show content
  769. $content .= "<strong>Analysis Date:</strong> $ana_details->time
  770. (<a href=$ana_url>$ana_details->name</a>)";
  771. // Show interpro results
  772. $sql = "SELECT AFP.value AS afpvalue
  773. FROM {analysisfeatureprop} AFP
  774. INNER JOIN analysisfeature AF on AF.analysisfeature_id = AFP.analysisfeature_id
  775. WHERE AF.analysis_id = %d
  776. AND AF.feature_id = %d
  777. ";
  778. $interpro_results = db_query($sql, $ana->aid, $feature_id);
  779. while ($afp = db_fetch_object($interpro_results)) {
  780. $content .= $afp->afpvalue;
  781. }
  782. }
  783. $content .= '</td></tr></table></span></div></div>';
  784. }
  785. db_set_active($previous_db);
  786. return $content;
  787. }
  788. /*******************************************************************************
  789. * tripal_get_interpro_results()
  790. * Get interpro result from featureprop table for the feature
  791. */
  792. function tripal_get_interpro_results_index_version($feature_id){
  793. // Get cvterm_id for 'analysis_interpro_output_hit' which is required
  794. // for inserting into the analysisfeatureprop table
  795. $previous_db = db_set_active('chado');
  796. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  797. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  798. "WHERE CVT.name = 'analysis_interpro_output_hit' ".
  799. "AND CV.name = 'tripal'";
  800. $type_id = db_result(db_query($sql));
  801. // Get xml string from analysisfeatureprop value column
  802. $sql = "SELECT AFP.value AS afpvalue FROM {analysisfeatureprop} AFP ".
  803. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  804. "INNER JOIN analysisprop AP ON AP.analysis_id = AF.analysis_id ".
  805. "WHERE feature_id = %d ".
  806. "AND AFP.type_id = %d ";
  807. $result = db_query($sql, $feature_id, $type_id);
  808. db_set_active($previous_db);
  809. if (db_result($result)) {
  810. // get the HTML content for viewing each of the XML file
  811. while ($analysisfeatureprop = db_fetch_object($result)) {
  812. $content .= $analysisfeatureprop->afpvalue;
  813. }
  814. }
  815. return $content;
  816. }
  817. /*******************************************************************************
  818. * Tripal Interpro administrative setting form. This function is called by
  819. * tripal_analysis module which asks for an admin form to show on the page
  820. */
  821. function tripal_analysis_interpro_get_settings() {
  822. // Get an array of node types with internal names as keys
  823. $options = node_get_types('names');
  824. // Add 'chado_feature' to allowed content types for showing interpro results
  825. $allowedoptions ['chado_feature'] = "Show Interpro results on feature pages";
  826. $form['description'] = array(
  827. '#type' => 'item',
  828. '#value' => t("Some chado features were analyzed using InterProScan. This option allows user to display the interpro analysis results. Please read user manual for storage and display of interpro files. Check the box to enable the analysis results. Uncheck to disable it."),
  829. '#weight' => 0,
  830. );
  831. $form['tripal_analysis_interpro_setting'] = array(
  832. '#type' => 'checkboxes',
  833. '#options' => $allowedoptions,
  834. '#default_value' => variable_get('tripal_analysis_interpro_setting',
  835. array('chado_feature')),
  836. );
  837. $settings->form = $form;
  838. $settings->title = "Tripal Interpro";
  839. return $settings;
  840. }
  841. /*******************************************************************************
  842. * Set the permission types that the chado module uses. Essentially we
  843. * want permissionis that protect creation, editing and deleting of chado
  844. * data objects
  845. */
  846. function tripal_analysis_interpro_perm(){
  847. return array(
  848. 'access chado_analysis_interpro content',
  849. 'create chado_analysis_interpro content',
  850. 'delete chado_analysis_interpro content',
  851. 'edit chado_analysis_interpro content',
  852. );
  853. }
  854. /*******************************************************************************
  855. * The following function proves access control for users trying to
  856. * perform actions on data managed by this module
  857. */
  858. function chado_analysis_interpro_access($op, $node, $account){
  859. if ($op == 'create') {
  860. return user_access('create chado_analysis_interpro content', $account);
  861. }
  862. if ($op == 'update') {
  863. if (user_access('edit chado_analysis_interpro content', $account)) {
  864. return TRUE;
  865. }
  866. }
  867. if ($op == 'delete') {
  868. if (user_access('delete chado_analysis_interpro content', $account)) {
  869. return TRUE;
  870. }
  871. }
  872. if ($op == 'view') {
  873. if (user_access('access chado_analysis_interpro content', $account)) {
  874. return TRUE;
  875. }
  876. }
  877. return FALSE;
  878. }