tripal_analysis_interpro.module 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984
  1. <?php
  2. /*******************************************************************************
  3. * Tripal Interpro lets users show/hide iprscan results associated with a tripal
  4. * feature
  5. ******************************************************************************/
  6. function tripal_analysis_interpro_init(){
  7. // Add javascript and style sheet
  8. drupal_add_css(drupal_get_path('theme', 'tripal').
  9. '/css/tripal_analysis_interpro.css');
  10. // Add javascript and style sheet
  11. drupal_add_js(drupal_get_path('theme', 'tripal').
  12. '/js/tripal_analysis_interpro.js');
  13. }
  14. /*******************************************************************************
  15. * Provide information to drupal about the node types that we're creating
  16. * in this module
  17. */
  18. function tripal_analysis_interpro_node_info() {
  19. $nodes = array();
  20. $nodes['chado_analysis_interpro'] = array(
  21. 'name' => t('Analysis: Interpro'),
  22. 'module' => 'chado_analysis_interpro',
  23. 'description' => t('An interpro analysis from the chado database'),
  24. 'has_title' => FALSE,
  25. 'title_label' => t('Analysis: Interpro'),
  26. 'has_body' => FALSE,
  27. 'body_label' => t('Interpro Analysis Description'),
  28. 'locked' => TRUE
  29. );
  30. return $nodes;
  31. }
  32. /*******************************************************************************
  33. *
  34. */
  35. function tripal_analysis_interpro_block($op = 'list', $delta = 0, $edit=array()){
  36. switch($op) {
  37. case 'list':
  38. $blocks['tai_results']['info'] = t('Tripal InterProScan Analysis Results');
  39. $blocks['tai_results']['cache'] = BLOCK_NO_CACHE;
  40. return $blocks;
  41. case 'view':
  42. if(user_access('access chado_analysis_interpro content') and arg(0) == 'node' and is_numeric(arg(1))) {
  43. $nid = arg(1);
  44. $node = node_load($nid);
  45. $block = array();
  46. switch($delta){
  47. case 'tai_results':
  48. $block['subject'] = t('InterProScan Results');
  49. $block['content'] = theme('tripal_analysis_interpro_results', $node);
  50. break;
  51. default :
  52. }
  53. return $block;
  54. }
  55. }
  56. }
  57. /*******************************************************************************
  58. * Provide a Interpro Analysis form
  59. */
  60. function chado_analysis_interpro_form ($node){
  61. $type = node_get_types('type', $node);
  62. $form = array();
  63. $form['title']= array(
  64. '#type' => 'hidden',
  65. '#default_value' => $node->title,
  66. );
  67. $form['analysisname']= array(
  68. '#type' => 'textfield',
  69. '#title' => t('Analysis Name'),
  70. '#required' => FALSE,
  71. '#default_value' => $node->analysisname,
  72. '#weight' => 1
  73. );
  74. $form['program']= array(
  75. '#type' => 'textfield',
  76. '#title' => t('Program'),
  77. '#required' => TRUE,
  78. '#default_value' => $node->program,
  79. '#weight' => 2
  80. );
  81. $form['programversion']= array(
  82. '#type' => 'textfield',
  83. '#title' => t('Program Version'),
  84. '#required' => TRUE,
  85. '#default_value' => $node->programversion,
  86. '#weight' => 3
  87. );
  88. $form['algorithm']= array(
  89. '#type' => 'textfield',
  90. '#title' => t('Algorithm'),
  91. '#required' => FALSE,
  92. '#default_value' => $node->algorithm,
  93. '#weight' => 4
  94. );
  95. $form['sourcename']= array(
  96. '#type' => 'textfield',
  97. '#title' => t('Source Name'),
  98. '#required' => FALSE,
  99. '#default_value' => $node->sourcename,
  100. '#weight' => 5
  101. );
  102. $form['sourceversion']= array(
  103. '#type' => 'textfield',
  104. '#title' => t('Source Version'),
  105. '#required' => FALSE,
  106. '#default_value' => $node->sourceversion,
  107. '#weight' => 6
  108. );
  109. $form['sourceuri']= array(
  110. '#type' => 'textfield',
  111. '#title' => t('Source URI'),
  112. '#required' => FALSE,
  113. '#default_value' => $node->sourceuri,
  114. '#weight' => 7
  115. );
  116. // Get time saved in chado
  117. $default_time = $node->timeexecuted;
  118. $year = preg_replace("/^(\d+)-\d+-\d+ .*/", "$1", $default_time);
  119. $month = preg_replace("/^\d+-0?(\d+)-\d+ .*/", "$1", $default_time);
  120. $day = preg_replace("/^\d+-\d+-0?(\d+) .*/", "$1", $default_time);
  121. // If the time is not set, use current time
  122. if (!$default_time) {
  123. $default_time = time();
  124. $year = format_date($default_time, 'custom', 'Y');
  125. $month = format_date($default_time, 'custom', 'n');
  126. $day = format_date($default_time, 'custom', 'j');
  127. }
  128. $form['timeexecuted']= array(
  129. '#type' => 'date',
  130. '#title' => t('Time Executed'),
  131. '#required' => TRUE,
  132. '#default_value' => array(
  133. 'year' => $year,
  134. 'month' => $month,
  135. 'day' => $day,
  136. ),
  137. '#weight' => 8
  138. );
  139. $form['description']= array(
  140. '#type' => 'textarea',
  141. '#rows' => 15,
  142. '#title' => t('Description and/or Program Settings'),
  143. '#required' => FALSE,
  144. '#default_value' => check_plain($node->description),
  145. '#weight' => 9
  146. );
  147. //----InterProScan Settings (Shown only when Tripal Interpro is enabled) ----
  148. if (preg_match("/.*\|.*/",$node->interprofile)) {
  149. $prop_values = explode("|", $node->interprofile);
  150. $node->interprofile = $prop_values[0];
  151. $node->interproparameters = $prop_values[1];
  152. }
  153. $moreSettings ['interpro'] = 'Interpro Settings';
  154. $form['interpro'] = array(
  155. '#title' => t('Interpro Settings'),
  156. '#type' => 'fieldset',
  157. '#description' => t('Specific Settings for Interpro Analysis.'),
  158. '#collapsible' => TRUE,
  159. '#attributes' => array('id' => 'interpro-extra-settings'),
  160. '#weight' => 11
  161. );
  162. $form['interpro']['interprofile'] = array(
  163. '#title' => t('Interproscan Output File (in html format)'),
  164. '#type' => 'textfield',
  165. '#description' => t('The html output file generated by Interproscan in full path.'),
  166. '#default_value' => $node->interprofile,
  167. );
  168. $form['interpro']['interprojob'] = array(
  169. '#type' => 'checkbox',
  170. '#title' => t('Submit a job to parse the Interpro html output'),
  171. '#description' => t('Note: features associated with the interpro results must '.
  172. 'exist in chado before parsing the file. Otherwise, interpro '.
  173. 'results that cannot be linked to a feature will be '.
  174. 'discarded. Also, Triapl Interpro module needs to be enabled.'),
  175. '#default_value' => $node->interprojob,
  176. '#attributes' => array(
  177. 'onclick' => 'return isSubmittingJob(this)'
  178. )
  179. );
  180. $form['interpro']['parsego'] = array(
  181. '#type' => 'checkbox',
  182. '#title' => t('Load GO terms to the database'),
  183. '#description' => t('Check the box to load GO terms to chado database'),
  184. '#default_value' => $node->parsego
  185. );
  186. $form['interpro']['interproparameters'] = array(
  187. '#title' => t('Parameters'),
  188. '#type' => 'textfield',
  189. '#description' => t('The parameters for running the interpro analysis.'),
  190. '#default_value' => $node->interproparameters,
  191. );
  192. return $form;
  193. }
  194. /*******************************************************************************
  195. *
  196. */
  197. function chado_analysis_interpro_insert($node){
  198. global $user;
  199. // Create a timestamp so we can insert it into the chado database
  200. $time = $node->timeexecuted;
  201. $month = $time['month'];
  202. $day = $time['day'];
  203. $year = $time['year'];
  204. $timestamp = $month.'/'.$day.'/'.$year;
  205. // If this analysis already exists then don't recreate it in chado
  206. $analysis_id = $node->analysis_id;
  207. if ($analysis_id) {
  208. $sql = "SELECT analysis_id ".
  209. "FROM {Analysis} ".
  210. "WHERE analysis_id = %d ";
  211. $previous_db = tripal_db_set_active('chado');
  212. $analysis = db_fetch_object(db_query($sql, $node->analysis_id));
  213. tripal_db_set_active($previous_db);
  214. }
  215. // If the analysis doesn't exist then let's create it in chado.
  216. if(!$analysis){
  217. // First add the item to the chado analysis table
  218. $sql = "INSERT INTO {analysis} ".
  219. " (name, description, program, programversion, algorithm, ".
  220. " sourcename, sourceversion, sourceuri, timeexecuted) ".
  221. "VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s')";
  222. $previous_db = tripal_db_set_active('chado'); // use chado database
  223. db_query($sql,$node->analysisname, $node->description,
  224. $node->program,$node->programversion,$node->algorithm,
  225. $node->sourcename, $node->sourceversion, $node->sourceuri,
  226. $timestamp);
  227. // find the newly entered analysis_id
  228. $sql = "SELECT analysis_id ".
  229. "FROM {Analysis} ".
  230. "WHERE program='%s'".
  231. "AND programversion='%s'".
  232. "AND sourcename='%s'";
  233. $analysis_id = db_result(db_query($sql, $node->program,
  234. $node->programversion, $node->sourcename));
  235. // Get cvterm_id for 'analysis_interpro_settings'
  236. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  237. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  238. "WHERE CVT.name = 'analysis_interpro_settings' ".
  239. "AND CV.name = 'tripal'";
  240. $type_id = db_result(db_query($sql));
  241. // Insert into chado {analysisprop} table
  242. $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) ".
  243. "VALUES (%d, %d, '%s')";
  244. $interprosettings = $node->interprofile."|".$node->interproparameters;
  245. db_query($sql, $analysis_id, $type_id, $interprosettings);
  246. tripal_db_set_active($previous_db); // switch back to drupal database
  247. // Add a job if the user wants to parse the html output
  248. if($node->interprojob) {
  249. $job_args[0] = $analysis_id;
  250. $job_args[1] = $node->interprofile;
  251. if ($node->parsego) {
  252. $job_args[2] = 1;
  253. } else {
  254. $job_args[2] = 0;
  255. }
  256. if (is_readable($node->interprofile)) {
  257. $fname = preg_replace("/.*\/(.*)/", "$1", $node->interprofile);
  258. tripal_add_job("Parse interpro: $fname",'tripal_analysis_interpro',
  259. 'tripal_analysis_interpro_parseHTMLFile', $job_args, $user->uid);
  260. } else {
  261. drupal_set_message("Can not open interpro output file. Job not scheduled.");
  262. }
  263. }
  264. }
  265. // Make sure the entry for this analysis doesn't already exist in the
  266. // chado_analysis table if it doesn't exist then we want to add it.
  267. $node_check_sql = "SELECT * FROM {chado_analysis} ".
  268. "WHERE analysis_id = %d";
  269. $node_check = db_fetch_object(db_query($node_check_sql, $analysis_id));
  270. if(!$node_check){
  271. // next add the item to the drupal table
  272. $sql = "INSERT INTO {chado_analysis} (nid, vid, analysis_id) ".
  273. "VALUES (%d, %d, %d)";
  274. db_query($sql,$node->nid,$node->vid,$analysis_id);
  275. // Create a title for the analysis node using the unique keys so when the
  276. // node is saved, it will have a title
  277. $record = new stdClass();
  278. // If the analysis has a name, use it as the node title. If not, construct
  279. // the title using program, programversion, and sourcename
  280. if ($node->analysisname) {
  281. $record->title = $node->analysisname;
  282. } else {
  283. //Construct node title as "program (version)
  284. $record->title = "$node->program ($node->programversion)";
  285. }
  286. $record->nid = $node->nid;
  287. drupal_write_record('node',$record,'nid');
  288. drupal_write_record('node_revisions',$record,'nid');
  289. }
  290. }
  291. /*******************************************************************************
  292. * Delete interpro anlysis
  293. */
  294. function chado_analysis_interpro_delete($node){
  295. // Before removing, get analysis_id so we can remove it from chado database
  296. // later
  297. $sql_drupal = "SELECT analysis_id ".
  298. "FROM {chado_analysis} ".
  299. "WHERE nid = %d ".
  300. "AND vid = %d";
  301. $analysis_id = db_result(db_query($sql_drupal, $node->nid, $node->vid));
  302. // Remove data from the {chado_analysis}, {node}, and {node_revisions} tables
  303. $sql_del = "DELETE FROM {chado_analysis} ".
  304. "WHERE nid = %d ".
  305. "AND vid = %d";
  306. db_query($sql_del, $node->nid, $node->vid);
  307. $sql_del = "DELETE FROM {node} ".
  308. "WHERE nid = %d ".
  309. "AND vid = %d";
  310. db_query($sql_del, $node->nid, $node->vid);
  311. $sql_del = "DELETE FROM {node_revisions} ".
  312. "WHERE nid = %d ".
  313. "AND vid = %d";
  314. db_query($sql_del, $node->nid, $node->vid);
  315. //Remove from analysisfeatureprop, analysisfeature, analysis, and analysisprop tables
  316. $previous_db = tripal_db_set_active('chado');
  317. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE analysis_id=%d";
  318. $results = db_query($sql, $analysis_id);
  319. while ($af = db_fetch_object($results)) {
  320. db_query("DELETE FROM {analysisfeatureprop} WHERE analysisfeature_id = %d", $af->analysisfeature_id);
  321. }
  322. db_query("DELETE FROM {analysisfeature} WHERE analysis_id = %d", $analysis_id);
  323. db_query("DELETE FROM {analysisprop} WHERE analysis_id = %d", $analysis_id);
  324. db_query("DELETE FROM {analysis} WHERE analysis_id = %d", $analysis_id);
  325. tripal_db_set_active($previous_db);
  326. }
  327. /*******************************************************************************
  328. * Update interpro analysis
  329. */
  330. function chado_analysis_interpro_update($node){
  331. global $user;
  332. if($node->revision){
  333. // TODO -- decide what to do about revisions
  334. } else {
  335. // Create a timestamp so we can insert it into the chado database
  336. $time = $node->timeexecuted;
  337. $month = $time['month'];
  338. $day = $time['day'];
  339. $year = $time['year'];
  340. $timestamp = $month.'/'.$day.'/'.$year;
  341. // get the analysis_id for this node:
  342. $sql = "SELECT analysis_id ".
  343. "FROM {chado_analysis} ".
  344. "WHERE vid = %d";
  345. $analysis_id = db_fetch_object(db_query($sql, $node->vid))->analysis_id;
  346. $sql = "UPDATE {analysis} ".
  347. "SET name = '%s', ".
  348. " description = '%s', ".
  349. " program = '%s', ".
  350. " programversion = '%s', ".
  351. " algorithm = '%s', ".
  352. " sourcename = '%s', ".
  353. " sourceversion = '%s', ".
  354. " sourceuri = '%s', ".
  355. " timeexecuted = '%s' ".
  356. "WHERE analysis_id = %d ";
  357. $previous_db = tripal_db_set_active('chado'); // use chado database
  358. db_query($sql, $node->analysisname, $node->description, $node->program,
  359. $node->programversion,$node->algorithm,$node->sourcename,
  360. $node->sourceversion, $node->sourceuri, $timestamp, $analysis_id);
  361. // Get cvterm_id for 'analysis_interpro_settings'
  362. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  363. "INNER JOIN cv CV ON CV.cv_id = CVT.cv_id ".
  364. "WHERE CVT.name = 'analysis_interpro_settings' ".
  365. "AND CV.name = 'tripal'";
  366. $type_id = db_result(db_query($sql));
  367. $sql = "UPDATE {analysisprop} ".
  368. "SET value = '%s' ".
  369. "WHERE analysis_id = %d AND type_id = %d";
  370. $interprosettings = $node->interprofile."|".$node->interproparameters;
  371. db_query($sql, $interprosettings, $analysis_id, $type_id);
  372. tripal_db_set_active($previous_db); // switch back to drupal database
  373. // Add a job if the user wants to parse the html output
  374. if($node->interprojob) {
  375. $job_args[0] = $analysis_id;
  376. $job_args[1] = $node->interprofile;
  377. if ($node->parsego) {
  378. $job_args[2] = 1;
  379. } else {
  380. $job_args[2] = 0;
  381. }
  382. if (is_readable($node->interprofile)) {
  383. $fname = preg_replace("/.*\/(.*)/", "$1", $node->interprofile);
  384. tripal_add_job("Parse interpro: $fname",'tripal_analysis_interpro',
  385. 'tripal_analysis_interpro_parseHTMLFile', $job_args, $user->uid);
  386. } else {
  387. drupal_set_message("Can not open interpro output file. Job not scheduled.");
  388. }
  389. }
  390. // Create a title for the analysis node using the unique keys so when the
  391. // node is saved, it will have a title
  392. $record = new stdClass();
  393. // If the analysis has a name, use it as the node title. If not, construct
  394. // the title using program, programversion, and sourcename
  395. if ($node->analysisname) {
  396. $record->title = $node->analysisname;
  397. } else {
  398. //Construct node title as "program (version)
  399. $record->title = "$node->program ($node->programversion)";
  400. }
  401. $record->nid = $node->nid;
  402. drupal_write_record('node',$record,'nid');
  403. drupal_write_record('node_revisions',$record,'nid');
  404. }
  405. }
  406. /*******************************************************************************
  407. * When a node is requested by the user this function is called to allow us
  408. * to add auxiliary data to the node object.
  409. */
  410. function chado_analysis_interpro_load($node){
  411. // get the analysis_id for this node:
  412. $sql = "SELECT analysis_id FROM {chado_analysis} WHERE vid = %d";
  413. $ana_node = db_fetch_object(db_query($sql, $node->vid));
  414. $additions = new stdClass();
  415. if ($ana_node) {
  416. // get analysis information
  417. $sql = "SELECT Analysis_id, name AS analysisname, description, program, ".
  418. " programversion, algorithm, sourcename, sourceversion, ".
  419. " sourceuri, timeexecuted ".
  420. "FROM {Analysis} ".
  421. "WHERE Analysis_id = $ana_node->analysis_id";
  422. $previous_db = tripal_db_set_active('chado'); // use chado database
  423. $additions = db_fetch_object(db_query($sql));
  424. // get cvterm_id for 'analysis_interpro_settings'
  425. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  426. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  427. "WHERE CVT.name = 'analysis_interpro_settings' ".
  428. "AND CV.name = 'tripal'";
  429. $type_id = db_result(db_query($sql));
  430. // get analysisprop information
  431. $sql = "SELECT value FROM {analysisprop} ".
  432. "WHERE analysis_id = %d ".
  433. "AND type_id = %d";
  434. $analysisprop = db_result(db_query($sql, $ana_node->analysis_id, $type_id));
  435. $prop_values = explode ("|", $analysisprop, 1);
  436. $additions->interprofile = $prop_values[0];
  437. $additions->interproparameters = $prop_values[1];
  438. tripal_db_set_active($previous_db); // now use drupal database
  439. }
  440. // If the analysis has a name, use it as the node title. If not, construct
  441. // the title using program programversion, and sourcename
  442. if ($additions->analysisname) {
  443. $additions->title = $additions->analysisname;
  444. } else {
  445. // Construct node title as "program version (source)
  446. $additions->title = "$additions->program ($additions->programversion)";
  447. }
  448. return $additions;
  449. }
  450. /*******************************************************************************
  451. * This function customizes the view of the chado_analysis node. It allows
  452. * us to generate the markup.
  453. */
  454. function chado_analysis_interpro_view ($node, $teaser = FALSE, $page = FALSE) {
  455. // use drupal's default node view:
  456. if (!$teaser) {
  457. $node = node_prepare($node, $teaser);
  458. // When previewing a node submitting form, it shows 'Array' instead of
  459. // correct date format. We need to format the date here
  460. $time = $node->timeexecuted;
  461. if(is_array($time)){
  462. $month = $time['month'];
  463. $day = $time['day'];
  464. $year = $time['year'];
  465. $timestamp = $year.'-'.$month.'-'.$day;
  466. $node->timeexecuted = $timestamp;
  467. }
  468. // When viewing a node, we need to reformat the analysisprop since we
  469. // separate each value with a bar |
  470. if (preg_match("/.*\|.*/",$node->interprofile)) {
  471. $prop_values = explode("|", $node->interprofile);
  472. $node->interprofile = $prop_values[0];
  473. $node->interproparameters = $prop_values[1];
  474. }
  475. }
  476. return $node;
  477. }
  478. /*******************************************************************************
  479. * Parse Interpro HTML Output file into analysisfeatureprop table
  480. */
  481. function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $parsego, $job_id) {
  482. // Prepare log
  483. $filename = preg_replace("/.*\/(.*)/", "$1", $interprofile);
  484. $logfile = file_directory_path() . "/tripal/tripal_analysis_interpro/load_$filename.log";
  485. $log = fopen($logfile, 'a'); // append parsing results to log file
  486. // Parsing started
  487. print "Parsing File:".$interprofile." ...\n";
  488. fwrite($log, date("D M j G:i:s Y").". Loading $interprofile\n");
  489. // Get cvterm_id for 'analysis_interpro_output_iteration_hits' which is required
  490. // for inserting into the analysisfeatureprop table
  491. $previous_db = tripal_db_set_active('chado'); // use chado database
  492. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  493. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  494. "WHERE CVT.name = 'analysis_interpro_output_hit' ".
  495. "AND CV.name = 'tripal'";
  496. $type_id = db_result(db_query($sql));
  497. print "cvterm_id for analysis_interpro_output_iteration_hits is $type_id\n";
  498. // Load the HTML file and convert it into XML for loading
  499. $dom = new domDocument;
  500. $dom->loadHTMLFile($interprofile);
  501. $xml = $dom->saveXML();
  502. $interproput = simplexml_load_string($xml);
  503. // Get html tables for parsing
  504. $tables = $interproput->children()->children();
  505. // Count the number of tables to be processed
  506. $no_iterations = 0;
  507. foreach($tables as $tmp) {
  508. if ($tmp->getName() == 'table') {
  509. $no_iterations ++;
  510. }
  511. }
  512. print "$no_iterations html tables to be processed.\n";
  513. $interval = intval($no_iterations * 0.01);
  514. $idx_iterations = 0;
  515. // Processed the tables
  516. foreach ($tables as $table) {
  517. //if (preg_match('/No hits reported/', $table->asXML()) ) {
  518. //print "skipping this table b/c no hits are reported\n";
  519. //}
  520. // make sure we are looking at a table and its not an empty table
  521. if ($table->getName() == 'table' && !preg_match('/No hits reported/', $table->asXML()) ) {
  522. $idx_iterations ++;
  523. if ($idx_iterations % $interval == 0) {
  524. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  525. tripal_db_set_active($previous_db);
  526. tripal_job_set_progress($job_id, $percentage);
  527. $previous_db = tripal_db_set_active('chado');
  528. print $percentage."% ";
  529. }
  530. // Set job status
  531. // Get the first row and match its name with the feature name
  532. $firsttd = $table->children()->children()->children();
  533. $feature_id = 0;
  534. foreach($firsttd as $b) {
  535. foreach($b->children() as $a) {
  536. if ($a->getName() == 'a') {
  537. // Remove _ORF from the sequence name
  538. $seqname = preg_replace('/^(.+?)_\d_.+/', "$1", $a);
  539. print "seqname is $seqname\n";
  540. // Find out how many features match this uniquename
  541. $sql = "SELECT count(feature_id) FROM {feature} ".
  542. "WHERE uniquename = '%s' ";
  543. $no_features = db_result(db_query($sql, $seqname));
  544. // If there is only one match, get the feature_id
  545. if ($no_features == 1) {
  546. $sql = "SELECT feature_id FROM {feature} ".
  547. "WHERE uniquename = '%s' ";
  548. $feature_id = db_result(db_query($sql, $seqname));
  549. print "\tfeature id is $feature_id\n";
  550. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  551. } else if ($no_features > 1) {
  552. fwrite($log, "Ambiguous: ".$seqname." matches more than one feature and is not processed.\n");
  553. continue;
  554. // If the uniquename did not match, skip and print 'Failed'
  555. } else {
  556. fwrite($log, "Failed: ".$seqname."\n");
  557. }
  558. }
  559. }
  560. }
  561. // Successfully matched. print 'Succeeded'. Add analysis_id and
  562. // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
  563. if ($feature_id) {
  564. //------------------------------------
  565. // Clease unwanted rows from the table
  566. //------------------------------------
  567. $parent_row = "/<tr><td valign=\"top\"><b>Parent<\/b><\/td>\s*<td valign=\"top\">\s*no.*?parent<\/td>\s*<\/tr>/";
  568. $children_row = "/<tr><td valign=\"top\"><b>Children<\/b><\/td>\s*<td valign=\"top\">\s*no.*?children<\/td>\s*<\/tr>/";
  569. $found_row = "/<tr><td valign=\"top\"><b>Found.*?in<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  570. $contains_row = "/<tr><td valign=\"top\"><b>Contains<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  571. $go_row = "/<tr><td valign=\"top\"><b>GO.*?terms<\/b><\/td>\s*<td valign=\"top\">\s*none<\/td>\s*<\/tr>/";
  572. $table_txt = $table->asXML();
  573. $table_txt = preg_replace($parent_row, "", $table_txt);
  574. $table_txt = preg_replace($children_row, "", $table_txt);
  575. $table_txt = preg_replace($found_row, "", $table_txt);
  576. $table_txt = preg_replace($contains_row, "", $table_txt);
  577. $table_txt = preg_replace($go_row, "", $table_txt);
  578. //------------------------------------
  579. // Clease unwanted ORF link from table
  580. //------------------------------------
  581. $orf_link = "/<b><a href=\"\/iprscan\/wget.*?\">(.*?)<\/a><\/b>/";
  582. $table_txt = preg_replace($orf_link, "$1", $table_txt);
  583. //print "----------------------------\n";
  584. //print "old: ".$table->asXML()."\n\n\n";
  585. //print "----------------------------\n";
  586. //print "Fixed: $table_txt\n";
  587. //print "----------------------------\n";
  588. //------------------------------------
  589. // If this feature has already been associated with this analysis, do not reinsert
  590. // Otherwise, Insert into analysisfeature table
  591. //------------------------------------
  592. $sql = "Select analysisfeature_id as id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  593. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  594. if($analysisfeature){ $analysisfeature_id = $analysisfeature->id; }
  595. if(!$analysisfeature_id){
  596. print "inserting analysisfeature\n";
  597. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  598. "VALUES (%d, %d)";
  599. db_query ($sql, $feature_id, $analysis_id);
  600. $sql = "Select analysisfeature_id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  601. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  602. $analysisfeature_id = $analysisfeature->id;
  603. }
  604. print "analysisfeature_id is $analysisfeature_id (analysis_id = $analysis_id; feature_id = $feature_id)\n";
  605. // Get the higest rank for this feature_id in analysisfeatureprop table.
  606. // If the value of the inserting content is not duplicate, add it to
  607. // analysisfeaturepro with 'higest_rank + 1'
  608. $sql = "SELECT MAX(rank) FROM {analysisfeatureprop} AFP ".
  609. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  610. "WHERE feature_id=%d ".
  611. "AND analysis_id=%d ".
  612. "AND type_id=%d ";
  613. $afp = db_fetch_object(db_query($sql, $feature_id, $analysis_id, $type_id));
  614. $hi_rank = 0;
  615. if ($afp) {
  616. $hi_rank = $afp->max + 1;
  617. }
  618. //------------------------------------------------------------
  619. // Insert interpro html tags into analysisfeatureprop table
  620. //------------------------------------------------------------
  621. // Before inserting, make sure it's not a duplicate
  622. $sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id = %d AND type_id = %d";
  623. $result = db_query($sql, $analysisfeature_id, $type_id);
  624. $duplicate = 0;
  625. while ($afp_value = db_fetch_object($result)) {
  626. if ($table_txt == $afp_value->value) {
  627. $duplicate = 1;
  628. }
  629. }
  630. if (!$duplicate) {
  631. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  632. "VALUES (%d, %d, '%s', %d)";
  633. db_query($sql, $analysisfeature_id, $type_id, $table_txt, $hi_rank);
  634. fwrite($log, " (Insert)\n"); // write to log
  635. print "\twriting table\n";
  636. } else {
  637. fwrite($log, " (Skipped)\n");
  638. print "\tskipping table - dup\n";
  639. }
  640. // Parse GO terms. Make sure GO database schema is installed in chado
  641. $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
  642. if (!$go_db_id) {
  643. print 'GO schema not installed in chado. GO terms are not processed.';
  644. }
  645. if ($go_db_id && $parsego) {
  646. $trs = $table->children();
  647. foreach ($trs as $tr) {
  648. $tds = $tr->children();
  649. foreach($tds as $td) {
  650. $gotags = $td->children();
  651. foreach ($gotags as $gotag) {
  652. // Look for 'GO:accession#'
  653. if (preg_match("/^.*?GO:(\d+).*$/", $gotag, $matches)) {
  654. // Find cvterm_id for the matched GO term
  655. $sql = "SELECT cvterm_id FROM {cvterm} CVT
  656. INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
  657. WHERE DBX.accession = '%s' AND DBX.db_id = %d";
  658. $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
  659. //-------------------------------------------
  660. // Insert GO terms into feature_cvterm table
  661. //-------------------------------------------
  662. // Default pub_id = 1 (NULL) was used
  663. $sql = "INSERT INTO {feature_cvterm} (feature_id, cvterm_id, pub_id)
  664. VALUES (%d, %d, 1)";
  665. db_query($sql, $feature_id, $goterm_id);
  666. //------------------------------------------------
  667. // Insert GO terms into analysisfeatureprop table
  668. //------------------------------------------------
  669. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) ".
  670. "VALUES (%d, %d, '%s', 0)";
  671. db_query($sql, $analysisfeature_id, $goterm_id, $matches[1]);
  672. }
  673. }
  674. }
  675. }
  676. }
  677. }
  678. }
  679. }
  680. tripal_db_set_active ($previous_db); // Use drupal database
  681. print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
  682. fwrite($log, "\n");
  683. fclose($log);
  684. return;
  685. }
  686. /*******************************************************************************
  687. * tripal_analysis_interpro_nodeapi()
  688. * HOOK: Implementation of hook_nodeapi()
  689. * Display interpro results for allowed node types
  690. */
  691. function tripal_analysis_interpro_nodeapi(&$node, $op, $teaser, $page) {
  692. switch ($op) {
  693. case 'view':
  694. // Find out which node types for showing the interpro
  695. $types_to_show = variable_get('tripal_analysis_interpro_setting',
  696. array('chado_feature'));
  697. // Abort if this node is not one of the types we should show.
  698. if (!in_array($node->type, $types_to_show, TRUE)) {
  699. break;
  700. }
  701. // Add interpro to the content item if it's not a teaser
  702. if (!$teaser && $node->feature->feature_id) {
  703. if($node->build_mode == NODE_BUILD_SEARCH_INDEX){
  704. $node->content['tripal_analysis_interpro_index_version'] = array(
  705. '#value' => theme('parse_EBI_Interpro_XML_index_version',$node),
  706. '#weight' => 7,
  707. );
  708. } else {
  709. // Show interpro result if not at teaser view
  710. $node->content['tripal_analysis_interpro_form'] = array(
  711. '#value' => theme('tripal_analysis_interpro_results', $node),
  712. '#weight' => 7
  713. );
  714. }
  715. }
  716. }
  717. }
  718. /************************************************************************
  719. * We need to let drupal know about our theme functions and their arguments.
  720. * We create theme functions to allow users of the module to customize the
  721. * look and feel of the output generated in this module
  722. */
  723. function tripal_analysis_interpro_theme () {
  724. return array(
  725. 'parse_EBI_Interpro_XML_index_version' => array (
  726. 'arguments' => array('node'),
  727. ),
  728. 'tripal_analysis_interpro_results' => array (
  729. 'arguments' => array('node'=> null),
  730. 'template' => 'tripal_analysis_interpro_results',
  731. )
  732. );
  733. }
  734. /*******************************************************************************
  735. * Prepare interpro result for the feature shown on the page
  736. */
  737. //function theme_tripal_analysis_interpro_results ($node) {
  738. // $feature = $node->feature;
  739. // $content = tripal_get_interpro_results($feature->feature_id);
  740. // return $content;
  741. //}
  742. function tripal_analysis_interpro_preprocess_tripal_analysis_interpro_results(&$variables){
  743. $feature = $variables['node']->feature;
  744. $variables['tripal_analysis_interpro']['results'] = tripal_get_interpro_results($feature->feature_id);
  745. }
  746. /*******************************************************************************
  747. * Prepare interpro result for the feature shown on the page
  748. */
  749. function theme_parse_EBI_Interpro_XML_index_version ($node) {
  750. $feature = $node->feature;
  751. $content = tripal_get_interpro_results_index_version($feature->feature_id);
  752. return $content;
  753. }
  754. /*******************************************************************************
  755. * tripal_get_interpro_results()
  756. * Get interpro result from featureprop table for the feature
  757. */
  758. function tripal_get_interpro_results($feature_id){
  759. // Get cvterm_id for 'analysis_interpro_output_hit' which is required
  760. // for inserting into the analysisfeatureprop table
  761. $previous_db = tripal_db_set_active('chado');
  762. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT
  763. INNER JOIN cv ON cv.cv_id = CVT.cv_id
  764. WHERE CVT.name = 'analysis_interpro_output_hit'
  765. AND CV.name = 'tripal'";
  766. $type_id = db_result(db_query($sql));
  767. // Get analysis times for the feature
  768. $sql = "SELECT A.analysis_id AS aid
  769. FROM {analysis} A
  770. INNER JOIN analysisfeature AF ON A.analysis_id = AF.analysis_id
  771. INNER JOIN analysisfeatureprop AFP ON AF.analysisfeature_id = AFP.analysisfeature_id
  772. WHERE feature_id = %d
  773. AND AFP.type_id = %d
  774. GROUP BY A.analysis_id
  775. ";
  776. $hasResult = db_result(db_query($sql, $feature_id, $type_id));
  777. $result = db_query($sql, $feature_id, $type_id);
  778. // Show interpro result ORDER BY time
  779. if ($hasResult) { // If there is any result, show expandable box
  780. $content .= "<div id=\"interpro-hits\" class=\"tripal_interpro-info-box\">
  781. <div class=\"tripal_expandableBox\">
  782. <h3>InterProScan Analysis</h3>
  783. </div>
  784. <div class=\"tripal_expandableBoxContent\">
  785. <span>
  786. <table class=\"tripal_interpro_results_table\">
  787. <tr><td>";
  788. while ($ana = db_fetch_object($result)) {
  789. // Show analysis date
  790. $sql = "SELECT name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  791. FROM {analysis}
  792. WHERE analysis_id = %d";
  793. $ana_details = db_fetch_object(db_query($sql, $ana->aid));
  794. // Find node id for the analysis
  795. tripal_db_set_active($previous_db);
  796. $ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $ana->aid));
  797. $ana_url = url("node/".$ana_nid);
  798. $previous_db = tripal_db_set_active('chado');
  799. // Show content
  800. $content .= "<strong>Analysis Date:</strong> $ana_details->time
  801. (<a href=$ana_url>$ana_details->name</a>)";
  802. // Show interpro results
  803. $sql = "SELECT AFP.value AS afpvalue
  804. FROM {analysisfeatureprop} AFP
  805. INNER JOIN analysisfeature AF on AF.analysisfeature_id = AFP.analysisfeature_id
  806. WHERE AF.analysis_id = %d
  807. AND AF.feature_id = %d
  808. ";
  809. $interpro_results = db_query($sql, $ana->aid, $feature_id);
  810. while ($afp = db_fetch_object($interpro_results)) {
  811. $content .= $afp->afpvalue;
  812. }
  813. }
  814. $content .= '</td></tr></table></span></div></div>';
  815. }
  816. tripal_db_set_active($previous_db);
  817. return $content;
  818. }
  819. /*******************************************************************************
  820. * tripal_get_interpro_results()
  821. * Get interpro result from featureprop table for the feature
  822. */
  823. function tripal_get_interpro_results_index_version($feature_id){
  824. // Get cvterm_id for 'analysis_interpro_output_hit' which is required
  825. // for inserting into the analysisfeatureprop table
  826. $previous_db = tripal_db_set_active('chado');
  827. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  828. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  829. "WHERE CVT.name = 'analysis_interpro_output_hit' ".
  830. "AND CV.name = 'tripal'";
  831. $type_id = db_result(db_query($sql));
  832. // Get xml string from analysisfeatureprop value column
  833. $sql = "SELECT AFP.value AS afpvalue FROM {analysisfeatureprop} AFP ".
  834. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  835. "INNER JOIN analysisprop AP ON AP.analysis_id = AF.analysis_id ".
  836. "WHERE feature_id = %d ".
  837. "AND AFP.type_id = %d ";
  838. $result = db_query($sql, $feature_id, $type_id);
  839. tripal_db_set_active($previous_db);
  840. if (db_result($result)) {
  841. // get the HTML content for viewing each of the XML file
  842. while ($analysisfeatureprop = db_fetch_object($result)) {
  843. $content .= $analysisfeatureprop->afpvalue;
  844. }
  845. }
  846. return $content;
  847. }
  848. /*******************************************************************************
  849. * Tripal Interpro administrative setting form. This function is called by
  850. * tripal_analysis module which asks for an admin form to show on the page
  851. */
  852. function tripal_analysis_interpro_get_settings() {
  853. // Get an array of node types with internal names as keys
  854. $options = node_get_types('names');
  855. // Add 'chado_feature' to allowed content types for showing interpro results
  856. $allowedoptions ['chado_feature'] = "Show Interpro results on feature pages";
  857. $form['description'] = array(
  858. '#type' => 'item',
  859. '#value' => t("Some chado features were analyzed using InterProScan. This option allows user to display the interpro analysis results. Please read user manual for storage and display of interpro files. Check the box to enable the analysis results. Uncheck to disable it."),
  860. '#weight' => 0,
  861. );
  862. $form['tripal_analysis_interpro_setting'] = array(
  863. '#type' => 'checkboxes',
  864. '#options' => $allowedoptions,
  865. '#default_value' => variable_get('tripal_analysis_interpro_setting',
  866. array('chado_feature')),
  867. );
  868. $settings->form = $form;
  869. $settings->title = "Tripal Interpro";
  870. return $settings;
  871. }
  872. /*******************************************************************************
  873. * Set the permission types that the chado module uses. Essentially we
  874. * want permissionis that protect creation, editing and deleting of chado
  875. * data objects
  876. */
  877. function tripal_analysis_interpro_perm(){
  878. return array(
  879. 'access chado_analysis_interpro content',
  880. 'create chado_analysis_interpro content',
  881. 'delete chado_analysis_interpro content',
  882. 'edit chado_analysis_interpro content',
  883. );
  884. }
  885. /*******************************************************************************
  886. * The following function proves access control for users trying to
  887. * perform actions on data managed by this module
  888. */
  889. function chado_analysis_interpro_access($op, $node, $account){
  890. if ($op == 'create') {
  891. return user_access('create chado_analysis_interpro content', $account);
  892. }
  893. if ($op == 'update') {
  894. if (user_access('edit chado_analysis_interpro content', $account)) {
  895. return TRUE;
  896. }
  897. }
  898. if ($op == 'delete') {
  899. if (user_access('delete chado_analysis_interpro content', $account)) {
  900. return TRUE;
  901. }
  902. }
  903. if ($op == 'view') {
  904. if (user_access('access chado_analysis_interpro content', $account)) {
  905. return TRUE;
  906. }
  907. }
  908. return FALSE;
  909. }