tripal_analysis_interpro.module 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. <?php
  2. // $Id:
  3. // Copyright 2009 Clemson University
  4. /*******************************************************************************
  5. * Tripal Interpro lets users show/hide iprscan results associated with a tripal
  6. * feature
  7. ******************************************************************************/
  8. function tripal_analysis_interpro_init(){
  9. // Add javascript and style sheet
  10. drupal_add_css(drupal_get_path('theme', 'tripal').
  11. '/css/tripal_analysis_interpro.css');
  12. // Add javascript and style sheet
  13. drupal_add_js(drupal_get_path('theme', 'tripal').
  14. '/js/tripal_analysis_interpro.js');
  15. }
  16. /*******************************************************************************
  17. * Provide information to drupal about the node types that we're creating
  18. * in this module
  19. */
  20. function tripal_analysis_interpro_node_info() {
  21. $nodes = array();
  22. $nodes['chado_analysis_interpro'] = array(
  23. 'name' => t('Analysis: Interpro'),
  24. 'module' => 'chado_analysis_interpro',
  25. 'description' => t('An interpro analysis from the chado database'),
  26. 'has_title' => FALSE,
  27. 'title_label' => t('Analysis: Interpro'),
  28. 'has_body' => FALSE,
  29. 'body_label' => t('Interpro Analysis Description'),
  30. 'locked' => TRUE
  31. );
  32. return $nodes;
  33. }
  34. /*******************************************************************************
  35. * Provide a Interpro Analysis form
  36. */
  37. function chado_analysis_interpro_form ($node){
  38. $type = node_get_types('type', $node);
  39. $form = array();
  40. $form['title']= array(
  41. '#type' => 'hidden',
  42. '#default_value' => $node->title,
  43. );
  44. $form['analysisname']= array(
  45. '#type' => 'textfield',
  46. '#title' => t('Analysis Name'),
  47. '#required' => FALSE,
  48. '#default_value' => $node->analysisname,
  49. '#weight' => 1
  50. );
  51. $form['program']= array(
  52. '#type' => 'textfield',
  53. '#title' => t('Program'),
  54. '#required' => TRUE,
  55. '#default_value' => $node->program,
  56. '#weight' => 2
  57. );
  58. $form['programversion']= array(
  59. '#type' => 'textfield',
  60. '#title' => t('Program Version'),
  61. '#required' => TRUE,
  62. '#default_value' => $node->programversion,
  63. '#weight' => 3
  64. );
  65. $form['algorithm']= array(
  66. '#type' => 'textfield',
  67. '#title' => t('Algorithm'),
  68. '#required' => FALSE,
  69. '#default_value' => $node->algorithm,
  70. '#weight' => 4
  71. );
  72. $form['sourcename']= array(
  73. '#type' => 'textfield',
  74. '#title' => t('Source Name'),
  75. '#required' => FALSE,
  76. '#default_value' => $node->sourcename,
  77. '#weight' => 5
  78. );
  79. $form['sourceversion']= array(
  80. '#type' => 'textfield',
  81. '#title' => t('Source Version'),
  82. '#required' => FALSE,
  83. '#default_value' => $node->sourceversion,
  84. '#weight' => 6
  85. );
  86. $form['sourceuri']= array(
  87. '#type' => 'textfield',
  88. '#title' => t('Source URI'),
  89. '#required' => FALSE,
  90. '#default_value' => $node->sourceuri,
  91. '#weight' => 7
  92. );
  93. // Get time saved in chado
  94. $default_time = $node->timeexecuted;
  95. $year = preg_replace("/^(\d+)-\d+-\d+ .*/", "$1", $default_time);
  96. $month = preg_replace("/^\d+-0?(\d+)-\d+ .*/", "$1", $default_time);
  97. $day = preg_replace("/^\d+-\d+-0?(\d+) .*/", "$1", $default_time);
  98. // If the time is not set, use current time
  99. if (!$default_time) {
  100. $default_time = time();
  101. $year = format_date($default_time, 'custom', 'Y');
  102. $month = format_date($default_time, 'custom', 'n');
  103. $day = format_date($default_time, 'custom', 'j');
  104. }
  105. $form['timeexecuted']= array(
  106. '#type' => 'date',
  107. '#title' => t('Time Executed'),
  108. '#required' => TRUE,
  109. '#default_value' => array(
  110. 'year' => $year,
  111. 'month' => $month,
  112. 'day' => $day,
  113. ),
  114. '#weight' => 8
  115. );
  116. $form['description']= array(
  117. '#type' => 'textarea',
  118. '#rows' => 15,
  119. '#title' => t('Description and/or Program Settings'),
  120. '#required' => FALSE,
  121. '#default_value' => check_plain($node->description),
  122. '#weight' => 9
  123. );
  124. //----InterProScan Settings (Shown only when Tripal Interpro is enabled) ----
  125. if (preg_match("/.*\|.*/",$node->interprofile)) {
  126. $prop_values = explode("|", $node->interprofile);
  127. $node->interprofile = $prop_values[0];
  128. $node->interproparameters = $prop_values[1];
  129. }
  130. $moreSettings ['interpro'] = 'Interpro Settings';
  131. $form['interpro'] = array(
  132. '#title' => t('Interpro Settings'),
  133. '#type' => 'fieldset',
  134. '#description' => t('Specific Settings for Interpro Analysis.'),
  135. '#collapsible' => TRUE,
  136. '#attributes' => array('id' => 'interpro-extra-settings'),
  137. '#weight' => 11
  138. );
  139. $form['interpro']['interprofile'] = array(
  140. '#title' => t('Interproscan Output File (in html format)'),
  141. '#type' => 'textfield',
  142. '#description' => t('The html output file generated by Interproscan in full path.'),
  143. '#default_value' => $node->interprofile,
  144. );
  145. $form['interpro']['interprojob'] = array(
  146. '#type' => 'checkbox',
  147. '#title' => t('Submit a job to parse the Interpro html output'),
  148. '#description' => t('Note: features associated with the interpro results must '.
  149. 'exist in chado before parsing the file. Otherwise, interpro '.
  150. 'results that cannot be linked to a feature will be '.
  151. 'discarded. Also, Triapl Interpro module needs to be enabled.'),
  152. '#default_value' => $node->interprojob,
  153. '#attributes' => array(
  154. 'onclick' => 'return isSubmittingJob(this)'
  155. )
  156. );
  157. $form['interpro']['parsego'] = array(
  158. '#type' => 'checkbox',
  159. '#title' => t('Load GO terms to the database'),
  160. '#description' => t('Check the box to load GO terms to chado database'),
  161. '#default_value' => $node->parsego
  162. );
  163. $form['interpro']['interproparameters'] = array(
  164. '#title' => t('Parameters'),
  165. '#type' => 'textfield',
  166. '#description' => t('The parameters for running the interpro analysis.'),
  167. '#default_value' => $node->interproparameters,
  168. );
  169. return $form;
  170. }
  171. /*******************************************************************************
  172. *
  173. */
  174. function chado_analysis_interpro_insert($node){
  175. global $user;
  176. // Create a timestamp so we can insert it into the chado database
  177. $time = $node->timeexecuted;
  178. $month = $time['month'];
  179. $day = $time['day'];
  180. $year = $time['year'];
  181. $timestamp = $month.'/'.$day.'/'.$year;
  182. // If this analysis already exists then don't recreate it in chado
  183. $analysis_id = $node->analysis_id;
  184. if ($analysis_id) {
  185. $sql = "SELECT analysis_id ".
  186. "FROM {Analysis} ".
  187. "WHERE analysis_id = %d ";
  188. $previous_db = tripal_db_set_active('chado');
  189. $analysis = db_fetch_object(db_query($sql, $node->analysis_id));
  190. tripal_db_set_active($previous_db);
  191. }
  192. // If the analysis doesn't exist then let's create it in chado.
  193. if(!$analysis){
  194. // First add the item to the chado analysis table
  195. $sql = "INSERT INTO {analysis} ".
  196. " (name, description, program, programversion, algorithm, ".
  197. " sourcename, sourceversion, sourceuri, timeexecuted) ".
  198. "VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s')";
  199. $previous_db = tripal_db_set_active('chado'); // use chado database
  200. db_query($sql,$node->analysisname, $node->description,
  201. $node->program,$node->programversion,$node->algorithm,
  202. $node->sourcename, $node->sourceversion, $node->sourceuri,
  203. $timestamp);
  204. // find the newly entered analysis_id
  205. $sql = "SELECT analysis_id ".
  206. "FROM {Analysis} ".
  207. "WHERE program='%s'".
  208. "AND programversion='%s'".
  209. "AND sourcename='%s'";
  210. $analysis_id = db_result(db_query($sql, $node->program,
  211. $node->programversion, $node->sourcename));
  212. // Get cvterm_id for 'analysis_interpro_settings'
  213. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  214. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  215. "WHERE CVT.name = 'analysis_interpro_settings' ".
  216. "AND CV.name = 'tripal'";
  217. $type_id = db_result(db_query($sql));
  218. // Insert into chado {analysisprop} table
  219. $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) ".
  220. "VALUES (%d, %d, '%s')";
  221. $interprosettings = $node->interprofile."|".$node->interproparameters;
  222. db_query($sql, $analysis_id, $type_id, $interprosettings);
  223. tripal_db_set_active($previous_db); // switch back to drupal database
  224. // Add a job if the user wants to parse the html output
  225. if($node->interprojob) {
  226. $job_args[0] = $analysis_id;
  227. $job_args[1] = $node->interprofile;
  228. if ($node->parsego) {
  229. $job_args[2] = 1;
  230. } else {
  231. $job_args[2] = 0;
  232. }
  233. if (is_readable($node->interprofile)) {
  234. $fname = preg_replace("/.*\/(.*)/", "$1", $node->interprofile);
  235. tripal_add_job("Parse interpro: $fname",'tripal_analysis_interpro',
  236. 'tripal_analysis_interpro_parseHTMLFile', $job_args, $user->uid);
  237. } else {
  238. drupal_set_message("Can not open interpro output file. Job not scheduled.");
  239. }
  240. }
  241. }
  242. // Make sure the entry for this analysis doesn't already exist in the
  243. // chado_analysis table if it doesn't exist then we want to add it.
  244. $node_check_sql = "SELECT * FROM {chado_analysis} ".
  245. "WHERE analysis_id = %d";
  246. $node_check = db_fetch_object(db_query($node_check_sql, $analysis_id));
  247. if(!$node_check){
  248. // next add the item to the drupal table
  249. $sql = "INSERT INTO {chado_analysis} (nid, vid, analysis_id) ".
  250. "VALUES (%d, %d, %d)";
  251. db_query($sql,$node->nid,$node->vid,$analysis_id);
  252. // Create a title for the analysis node using the unique keys so when the
  253. // node is saved, it will have a title
  254. $record = new stdClass();
  255. // If the analysis has a name, use it as the node title. If not, construct
  256. // the title using program, programversion, and sourcename
  257. if ($node->analysisname) {
  258. $record->title = $node->analysisname;
  259. } else {
  260. //Construct node title as "program (version)
  261. $record->title = "$node->program ($node->programversion)";
  262. }
  263. $record->nid = $node->nid;
  264. drupal_write_record('node',$record,'nid');
  265. drupal_write_record('node_revisions',$record,'nid');
  266. }
  267. }
  268. /*******************************************************************************
  269. * Delete interpro anlysis
  270. */
  271. function chado_analysis_interpro_delete($node){
  272. // Before removing, get analysis_id so we can remove it from chado database
  273. // later
  274. $sql_drupal = "SELECT analysis_id ".
  275. "FROM {chado_analysis} ".
  276. "WHERE nid = %d ".
  277. "AND vid = %d";
  278. $analysis_id = db_result(db_query($sql_drupal, $node->nid, $node->vid));
  279. // Remove data from the {chado_analysis}, {node}, and {node_revisions} tables
  280. $sql_del = "DELETE FROM {chado_analysis} ".
  281. "WHERE nid = %d ".
  282. "AND vid = %d";
  283. db_query($sql_del, $node->nid, $node->vid);
  284. $sql_del = "DELETE FROM {node} ".
  285. "WHERE nid = %d ".
  286. "AND vid = %d";
  287. db_query($sql_del, $node->nid, $node->vid);
  288. $sql_del = "DELETE FROM {node_revisions} ".
  289. "WHERE nid = %d ".
  290. "AND vid = %d";
  291. db_query($sql_del, $node->nid, $node->vid);
  292. //Remove from analysisfeatureprop, analysisfeature, analysis, and analysisprop tables
  293. $previous_db = tripal_db_set_active('chado');
  294. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE analysis_id=%d";
  295. $results = db_query($sql, $analysis_id);
  296. while ($af = db_fetch_object($results)) {
  297. db_query("DELETE FROM {analysisfeatureprop} WHERE analysisfeature_id = %d", $af->analysisfeature_id);
  298. }
  299. db_query("DELETE FROM {analysisfeature} WHERE analysis_id = %d", $analysis_id);
  300. db_query("DELETE FROM {analysisprop} WHERE analysis_id = %d", $analysis_id);
  301. db_query("DELETE FROM {analysis} WHERE analysis_id = %d", $analysis_id);
  302. tripal_db_set_active($previous_db);
  303. }
  304. /*******************************************************************************
  305. * Update interpro analysis
  306. */
  307. function chado_analysis_interpro_update($node){
  308. global $user;
  309. if($node->revision){
  310. // TODO -- decide what to do about revisions
  311. } else {
  312. // Create a timestamp so we can insert it into the chado database
  313. $time = $node->timeexecuted;
  314. $month = $time['month'];
  315. $day = $time['day'];
  316. $year = $time['year'];
  317. $timestamp = $month.'/'.$day.'/'.$year;
  318. // get the analysis_id for this node:
  319. $sql = "SELECT analysis_id ".
  320. "FROM {chado_analysis} ".
  321. "WHERE vid = %d";
  322. $analysis_id = db_fetch_object(db_query($sql, $node->vid))->analysis_id;
  323. $sql = "UPDATE {analysis} ".
  324. "SET name = '%s', ".
  325. " description = '%s', ".
  326. " program = '%s', ".
  327. " programversion = '%s', ".
  328. " algorithm = '%s', ".
  329. " sourcename = '%s', ".
  330. " sourceversion = '%s', ".
  331. " sourceuri = '%s', ".
  332. " timeexecuted = '%s' ".
  333. "WHERE analysis_id = %d ";
  334. $previous_db = tripal_db_set_active('chado'); // use chado database
  335. db_query($sql, $node->analysisname, $node->description, $node->program,
  336. $node->programversion,$node->algorithm,$node->sourcename,
  337. $node->sourceversion, $node->sourceuri, $timestamp, $analysis_id);
  338. // Get cvterm_id for 'analysis_interpro_settings'
  339. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  340. "INNER JOIN cv CV ON CV.cv_id = CVT.cv_id ".
  341. "WHERE CVT.name = 'analysis_interpro_settings' ".
  342. "AND CV.name = 'tripal'";
  343. $type_id = db_result(db_query($sql));
  344. $sql = "UPDATE {analysisprop} ".
  345. "SET value = '%s' ".
  346. "WHERE analysis_id = %d AND type_id = %d";
  347. $interprosettings = $node->interprofile."|".$node->interproparameters;
  348. db_query($sql, $interprosettings, $analysis_id, $type_id);
  349. tripal_db_set_active($previous_db); // switch back to drupal database
  350. // Add a job if the user wants to parse the html output
  351. if($node->interprojob) {
  352. $job_args[0] = $analysis_id;
  353. $job_args[1] = $node->interprofile;
  354. if ($node->parsego) {
  355. $job_args[2] = 1;
  356. } else {
  357. $job_args[2] = 0;
  358. }
  359. if (is_readable($node->interprofile)) {
  360. $fname = preg_replace("/.*\/(.*)/", "$1", $node->interprofile);
  361. tripal_add_job("Parse interpro: $fname",'tripal_analysis_interpro',
  362. 'tripal_analysis_interpro_parseHTMLFile', $job_args, $user->uid);
  363. } else {
  364. drupal_set_message("Can not open interpro output file. Job not scheduled.");
  365. }
  366. }
  367. // Create a title for the analysis node using the unique keys so when the
  368. // node is saved, it will have a title
  369. $record = new stdClass();
  370. // If the analysis has a name, use it as the node title. If not, construct
  371. // the title using program, programversion, and sourcename
  372. if ($node->analysisname) {
  373. $record->title = $node->analysisname;
  374. } else {
  375. //Construct node title as "program (version)
  376. $record->title = "$node->program ($node->programversion)";
  377. }
  378. $record->nid = $node->nid;
  379. drupal_write_record('node',$record,'nid');
  380. drupal_write_record('node_revisions',$record,'nid');
  381. }
  382. }
  383. /*******************************************************************************
  384. * When a node is requested by the user this function is called to allow us
  385. * to add auxiliary data to the node object.
  386. */
  387. function chado_analysis_interpro_load($node){
  388. // get the analysis_id for this node:
  389. $sql = "SELECT analysis_id FROM {chado_analysis} WHERE vid = %d";
  390. $ana_node = db_fetch_object(db_query($sql, $node->vid));
  391. $additions = new stdClass();
  392. if ($ana_node) {
  393. // get analysis information
  394. $sql = "SELECT Analysis_id, name AS analysisname, description, program, ".
  395. " programversion, algorithm, sourcename, sourceversion, ".
  396. " sourceuri, timeexecuted ".
  397. "FROM {Analysis} ".
  398. "WHERE Analysis_id = $ana_node->analysis_id";
  399. $previous_db = tripal_db_set_active('chado'); // use chado database
  400. $additions = db_fetch_object(db_query($sql));
  401. // get cvterm_id for 'analysis_interpro_settings'
  402. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  403. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  404. "WHERE CVT.name = 'analysis_interpro_settings' ".
  405. "AND CV.name = 'tripal'";
  406. $type_id = db_result(db_query($sql));
  407. // get analysisprop information
  408. $sql = "SELECT value FROM {analysisprop} ".
  409. "WHERE analysis_id = %d ".
  410. "AND type_id = %d";
  411. $analysisprop = db_result(db_query($sql, $ana_node->analysis_id, $type_id));
  412. $prop_values = explode ("|", $analysisprop, 1);
  413. $additions->interprofile = $prop_values[0];
  414. $additions->interproparameters = $prop_values[1];
  415. tripal_db_set_active($previous_db); // now use drupal database
  416. }
  417. // If the analysis has a name, use it as the node title. If not, construct
  418. // the title using program programversion, and sourcename
  419. if ($additions->analysisname) {
  420. $additions->title = $additions->analysisname;
  421. } else {
  422. // Construct node title as "program version (source)
  423. $additions->title = "$additions->program ($additions->programversion)";
  424. }
  425. return $additions;
  426. }
  427. /*******************************************************************************
  428. * This function customizes the view of the chado_analysis node. It allows
  429. * us to generate the markup.
  430. */
  431. function chado_analysis_interpro_view ($node, $teaser = FALSE, $page = FALSE) {
  432. // use drupal's default node view:
  433. if (!$teaser) {
  434. $node = node_prepare($node, $teaser);
  435. // When previewing a node submitting form, it shows 'Array' instead of
  436. // correct date format. We need to format the date here
  437. $time = $node->timeexecuted;
  438. if(is_array($time)){
  439. $month = $time['month'];
  440. $day = $time['day'];
  441. $year = $time['year'];
  442. $timestamp = $year.'-'.$month.'-'.$day;
  443. $node->timeexecuted = $timestamp;
  444. }
  445. // When viewing a node, we need to reformat the analysisprop since we
  446. // separate each value with a bar |
  447. if (preg_match("/.*\|.*/",$node->interprofile)) {
  448. $prop_values = explode("|", $node->interprofile);
  449. $node->interprofile = $prop_values[0];
  450. $node->interproparameters = $prop_values[1];
  451. }
  452. }
  453. return $node;
  454. }
  455. /*******************************************************************************
  456. * Parse Interpro HTML Output file into analysisfeatureprop table
  457. */
  458. function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $parsego, $job_id) {
  459. // Prepare log
  460. $filename = preg_replace("/.*\/(.*)/", "$1", $interprofile);
  461. $logfile = file_directory_path() . "/tripal/tripal_analysis_interpro/load_$filename.log";
  462. $log = fopen($logfile, 'a'); // append parsing results to log file
  463. // Parsing started
  464. print "Parsing File:".$interprofile." ...\n";
  465. fwrite($log, date("D M j G:i:s Y").". Loading $interprofile\n");
  466. // Get cvterm_id for 'analysis_interpro_output_iteration_hits' which is required
  467. // for inserting into the analysisfeatureprop table
  468. $previous_db = tripal_db_set_active('chado'); // use chado database
  469. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  470. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  471. "WHERE CVT.name = 'analysis_interpro_output_hit' ".
  472. "AND CV.name = 'tripal'";
  473. $type_id = db_result(db_query($sql));
  474. print "cvterm_id for analysis_interpro_output_iteration_hits is $type_id\n";
  475. // Load the HTML file and convert it into XML for loading
  476. $dom = new domDocument;
  477. $dom->loadHTMLFile($interprofile);
  478. $xml = $dom->saveXML();
  479. $interproput = simplexml_load_string($xml);
  480. // Get html tables for parsing
  481. $tables = $interproput->children()->children();
  482. // Count the number of tables to be processed
  483. $no_iterations = 0;
  484. foreach($tables as $tmp) {
  485. if ($tmp->getName() == 'table') {
  486. $no_iterations ++;
  487. }
  488. }
  489. print "$no_iterations html tables to be processed.\n";
  490. $interval = intval($no_iterations * 0.01);
  491. $idx_iterations = 0;
  492. // Processed the tables
  493. foreach ($tables as $table) {
  494. //if (preg_match('/No hits reported/', $table->asXML()) ) {
  495. //print "skipping this table b/c no hits are reported\n";
  496. //}
  497. // make sure we are looking at a table and its not an empty table
  498. if ($table->getName() == 'table' && !preg_match('/No hits reported/', $table->asXML()) ) {
  499. $idx_iterations ++;
  500. if ($idx_iterations % $interval == 0) {
  501. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  502. tripal_db_set_active($previous_db);
  503. tripal_job_set_progress($job_id, $percentage);
  504. $previous_db = tripal_db_set_active('chado');
  505. print $percentage."% ";
  506. }
  507. // Set job status
  508. // Get the first row and match its name with the feature name
  509. $firsttd = $table->children()->children()->children();
  510. $feature_id = 0;
  511. foreach($firsttd as $b) {
  512. foreach($b->children() as $a) {
  513. if ($a->getName() == 'a') {
  514. // Remove _ORF from the sequence name
  515. $seqname = preg_replace('/^(.+?)_\d_.+/', "$1", $a);
  516. print "seqname is $seqname\n";
  517. // Find out how many features match this uniquename
  518. $sql = "SELECT count(feature_id) FROM {feature} ".
  519. "WHERE uniquename = '%s' ";
  520. $no_features = db_result(db_query($sql, $seqname));
  521. // If there is only one match, get the feature_id
  522. if ($no_features == 1) {
  523. $sql = "SELECT feature_id FROM {feature} ".
  524. "WHERE uniquename = '%s' ";
  525. $feature_id = db_result(db_query($sql, $seqname));
  526. print "\tfeature id is $feature_id\n";
  527. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  528. } else if ($no_features > 1) {
  529. fwrite($log, "Ambiguous: ".$seqname." matches more than one feature and is not processed.\n");
  530. continue;
  531. // If the uniquename did not match, skip and print 'Failed'
  532. } else {
  533. fwrite($log, "Failed: ".$seqname."\n");
  534. }
  535. }
  536. }
  537. }
  538. // Successfully matched. print 'Succeeded'. Add analysis_id and
  539. // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
  540. if ($feature_id) {
  541. //------------------------------------
  542. // Clease unwanted rows from the table
  543. //------------------------------------
  544. $parent_row = "/<tr><td valign=\"top\"><b>Parent<\/b><\/td>\s*<td valign=\"top\">\s*no.*?parent<\/td>\s*<\/tr>/";
  545. $children_row = "/<tr><td valign=\"top\"><b>Children<\/b><\/td>\s*<td valign=\"top\">\s*no.*?children<\/td>\s*<\/tr>/";
  546. $found_row = "/<tr><td valign=\"top\"><b>Found.*?in<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  547. $contains_row = "/<tr><td valign=\"top\"><b>Contains<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
  548. $go_row = "/<tr><td valign=\"top\"><b>GO.*?terms<\/b><\/td>\s*<td valign=\"top\">\s*none<\/td>\s*<\/tr>/";
  549. $table_txt = $table->asXML();
  550. $table_txt = preg_replace($parent_row, "", $table_txt);
  551. $table_txt = preg_replace($children_row, "", $table_txt);
  552. $table_txt = preg_replace($found_row, "", $table_txt);
  553. $table_txt = preg_replace($contains_row, "", $table_txt);
  554. $table_txt = preg_replace($go_row, "", $table_txt);
  555. //------------------------------------
  556. // Clease unwanted ORF link from table
  557. //------------------------------------
  558. $orf_link = "/<b><a href=\"\/iprscan\/wget.*?\">(.*?)<\/a><\/b>/";
  559. $table_txt = preg_replace($orf_link, "$1", $table_txt);
  560. //print "----------------------------\n";
  561. //print "old: ".$table->asXML()."\n\n\n";
  562. //print "----------------------------\n";
  563. //print "Fixed: $table_txt\n";
  564. //print "----------------------------\n";
  565. //------------------------------------
  566. // If this feature has already been associated with this analysis, do not reinsert
  567. // Otherwise, Insert into analysisfeature table
  568. //------------------------------------
  569. $sql = "Select analysisfeature_id as id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  570. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  571. if($analysisfeature){ $analysisfeature_id = $analysisfeature->id; }
  572. if(!$analysisfeature_id){
  573. print "inserting analysisfeature\n";
  574. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  575. "VALUES (%d, %d)";
  576. db_query ($sql, $feature_id, $analysis_id);
  577. $sql = "Select analysisfeature_id from {analysisfeature} where feature_id = %d and analysis_id = %d";
  578. $analysisfeature = db_fetch_object(db_query($sql, $feature_id, $analysis_id));
  579. $analysisfeature_id = $analysisfeature->id;
  580. }
  581. print "analysisfeature_id is $analysisfeature_id (analysis_id = $analysis_id; feature_id = $feature_id)\n";
  582. // Get the higest rank for this feature_id in analysisfeatureprop table.
  583. // If the value of the inserting content is not duplicate, add it to
  584. // analysisfeaturepro with 'higest_rank + 1'
  585. $sql = "SELECT MAX(rank) FROM {analysisfeatureprop} AFP ".
  586. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  587. "WHERE feature_id=%d ".
  588. "AND analysis_id=%d ".
  589. "AND type_id=%d ";
  590. $afp = db_fetch_object(db_query($sql, $feature_id, $analysis_id, $type_id));
  591. $hi_rank = 0;
  592. if ($afp) {
  593. $hi_rank = $afp->max + 1;
  594. }
  595. //------------------------------------------------------------
  596. // Insert interpro html tags into analysisfeatureprop table
  597. //------------------------------------------------------------
  598. // Before inserting, make sure it's not a duplicate
  599. $sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id = %d AND type_id = %d";
  600. $result = db_query($sql, $analysisfeature_id, $type_id);
  601. $duplicate = 0;
  602. while ($afp_value = db_fetch_object($result)) {
  603. if ($table_txt == $afp_value->value) {
  604. $duplicate = 1;
  605. }
  606. }
  607. if (!$duplicate) {
  608. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  609. "VALUES (%d, %d, '%s', %d)";
  610. db_query($sql, $analysisfeature_id, $type_id, $table_txt, $hi_rank);
  611. fwrite($log, " (Insert)\n"); // write to log
  612. print "\twriting table\n";
  613. } else {
  614. fwrite($log, " (Skipped)\n");
  615. print "\tskipping table - dup\n";
  616. }
  617. // Parse GO terms. Make sure GO database schema is installed in chado
  618. $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
  619. if (!$go_db_id) {
  620. print 'GO schema not installed in chado. GO terms are not processed.';
  621. }
  622. if ($go_db_id && $parsego) {
  623. $trs = $table->children();
  624. foreach ($trs as $tr) {
  625. $tds = $tr->children();
  626. foreach($tds as $td) {
  627. $gotags = $td->children();
  628. foreach ($gotags as $gotag) {
  629. // Look for 'GO:accession#'
  630. if (preg_match("/^.*?GO:(\d+).*$/", $gotag, $matches)) {
  631. // Find cvterm_id for the matched GO term
  632. $sql = "SELECT cvterm_id FROM {cvterm} CVT
  633. INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
  634. WHERE DBX.accession = '%s' AND DBX.db_id = %d";
  635. $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
  636. //-------------------------------------------
  637. // Insert GO terms into feature_cvterm table
  638. //-------------------------------------------
  639. // Default pub_id = 1 (NULL) was used
  640. $sql = "INSERT INTO {feature_cvterm} (feature_id, cvterm_id, pub_id)
  641. VALUES (%d, %d, 1)";
  642. db_query($sql, $feature_id, $goterm_id);
  643. //------------------------------------------------
  644. // Insert GO terms into analysisfeatureprop table
  645. //------------------------------------------------
  646. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) ".
  647. "VALUES (%d, %d, '%s', 0)";
  648. db_query($sql, $analysisfeature_id, $goterm_id, $matches[1]);
  649. }
  650. }
  651. }
  652. }
  653. }
  654. }
  655. }
  656. }
  657. tripal_db_set_active ($previous_db); // Use drupal database
  658. print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
  659. fwrite($log, "\n");
  660. fclose($log);
  661. return;
  662. }
  663. /*******************************************************************************
  664. * tripal_analysis_interpro_nodeapi()
  665. * HOOK: Implementation of hook_nodeapi()
  666. * Display interpro results for allowed node types
  667. */
  668. function tripal_analysis_interpro_nodeapi(&$node, $op, $teaser, $page) {
  669. switch ($op) {
  670. case 'view':
  671. // Find out which node types for showing the interpro
  672. $types_to_show = variable_get('tripal_analysis_interpro_setting',
  673. array('chado_feature'));
  674. // Abort if this node is not one of the types we should show.
  675. if (!in_array($node->type, $types_to_show, TRUE)) {
  676. break;
  677. }
  678. // Add interpro to the content item if it's not a teaser
  679. if (!$teaser && $node->feature->feature_id) {
  680. if($node->build_mode == NODE_BUILD_SEARCH_INDEX){
  681. $node->content['tripal_analysis_interpro_index_version'] = array(
  682. '#value' => theme('parse_EBI_Interpro_XML_index_version',$node),
  683. '#weight' => 7,
  684. );
  685. } else {
  686. // Show interpro result if not at teaser view
  687. $node->content['tripal_analysis_interpro_form'] = array(
  688. '#value' => theme('tripal_analysis_interpro_results', $node),
  689. '#weight' => 7
  690. );
  691. }
  692. }
  693. }
  694. }
  695. /************************************************************************
  696. * We need to let drupal know about our theme functions and their arguments.
  697. * We create theme functions to allow users of the module to customize the
  698. * look and feel of the output generated in this module
  699. */
  700. function tripal_analysis_interpro_theme () {
  701. return array(
  702. 'parse_EBI_Interpro_XML_index_version' => array (
  703. 'arguments' => array('node'),
  704. ),
  705. 'tripal_analysis_interpro_results' => array (
  706. 'arguments' => array('node'),
  707. )
  708. );
  709. }
  710. /*******************************************************************************
  711. * Prepare interpro result for the feature shown on the page
  712. */
  713. function theme_tripal_analysis_interpro_results ($node) {
  714. $feature = $node->feature;
  715. $content = tripal_get_interpro_results($feature->feature_id);
  716. return $content;
  717. }
  718. /*******************************************************************************
  719. * Prepare interpro result for the feature shown on the page
  720. */
  721. function theme_parse_EBI_Interpro_XML_index_version ($node) {
  722. $feature = $node->feature;
  723. $content = tripal_get_interpro_results_index_version($feature->feature_id);
  724. return $content;
  725. }
  726. /*******************************************************************************
  727. * tripal_get_interpro_results()
  728. * Get interpro result from featureprop table for the feature
  729. */
  730. function tripal_get_interpro_results($feature_id){
  731. // Get cvterm_id for 'analysis_interpro_output_hit' which is required
  732. // for inserting into the analysisfeatureprop table
  733. $previous_db = tripal_db_set_active('chado');
  734. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT
  735. INNER JOIN cv ON cv.cv_id = CVT.cv_id
  736. WHERE CVT.name = 'analysis_interpro_output_hit'
  737. AND CV.name = 'tripal'";
  738. $type_id = db_result(db_query($sql));
  739. // Get analysis times for the feature
  740. $sql = "SELECT A.analysis_id AS aid
  741. FROM {analysis} A
  742. INNER JOIN analysisfeature AF ON A.analysis_id = AF.analysis_id
  743. INNER JOIN analysisfeatureprop AFP ON AF.analysisfeature_id = AFP.analysisfeature_id
  744. WHERE feature_id = %d
  745. AND AFP.type_id = %d
  746. GROUP BY A.analysis_id
  747. ";
  748. $hasResult = db_result(db_query($sql, $feature_id, $type_id));
  749. $result = db_query($sql, $feature_id, $type_id);
  750. // Show interpro result ORDER BY time
  751. if ($hasResult) { // If there is any result, show expandable box
  752. $content .= "<div id=\"interpro-hits\" class=\"tripal_interpro-info-box\">
  753. <div class=\"tripal_expandableBox\">
  754. <h3>InterProScan Analysis</h3>
  755. </div>
  756. <div class=\"tripal_expandableBoxContent\">
  757. <span>
  758. <table class=\"tripal_interpro_results_table\">
  759. <tr><td>";
  760. while ($ana = db_fetch_object($result)) {
  761. // Show analysis date
  762. $sql = "SELECT name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  763. FROM {analysis}
  764. WHERE analysis_id = %d";
  765. $ana_details = db_fetch_object(db_query($sql, $ana->aid));
  766. // Find node id for the analysis
  767. tripal_db_set_active($previous_db);
  768. $ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $ana->aid));
  769. $ana_url = url("node/".$ana_nid);
  770. $previous_db = tripal_db_set_active('chado');
  771. // Show content
  772. $content .= "<strong>Analysis Date:</strong> $ana_details->time
  773. (<a href=$ana_url>$ana_details->name</a>)";
  774. // Show interpro results
  775. $sql = "SELECT AFP.value AS afpvalue
  776. FROM {analysisfeatureprop} AFP
  777. INNER JOIN analysisfeature AF on AF.analysisfeature_id = AFP.analysisfeature_id
  778. WHERE AF.analysis_id = %d
  779. AND AF.feature_id = %d
  780. ";
  781. $interpro_results = db_query($sql, $ana->aid, $feature_id);
  782. while ($afp = db_fetch_object($interpro_results)) {
  783. $content .= $afp->afpvalue;
  784. }
  785. }
  786. $content .= '</td></tr></table></span></div></div>';
  787. }
  788. tripal_db_set_active($previous_db);
  789. return $content;
  790. }
  791. /*******************************************************************************
  792. * tripal_get_interpro_results()
  793. * Get interpro result from featureprop table for the feature
  794. */
  795. function tripal_get_interpro_results_index_version($feature_id){
  796. // Get cvterm_id for 'analysis_interpro_output_hit' which is required
  797. // for inserting into the analysisfeatureprop table
  798. $previous_db = tripal_db_set_active('chado');
  799. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  800. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  801. "WHERE CVT.name = 'analysis_interpro_output_hit' ".
  802. "AND CV.name = 'tripal'";
  803. $type_id = db_result(db_query($sql));
  804. // Get xml string from analysisfeatureprop value column
  805. $sql = "SELECT AFP.value AS afpvalue FROM {analysisfeatureprop} AFP ".
  806. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  807. "INNER JOIN analysisprop AP ON AP.analysis_id = AF.analysis_id ".
  808. "WHERE feature_id = %d ".
  809. "AND AFP.type_id = %d ";
  810. $result = db_query($sql, $feature_id, $type_id);
  811. tripal_db_set_active($previous_db);
  812. if (db_result($result)) {
  813. // get the HTML content for viewing each of the XML file
  814. while ($analysisfeatureprop = db_fetch_object($result)) {
  815. $content .= $analysisfeatureprop->afpvalue;
  816. }
  817. }
  818. return $content;
  819. }
  820. /*******************************************************************************
  821. * Tripal Interpro administrative setting form. This function is called by
  822. * tripal_analysis module which asks for an admin form to show on the page
  823. */
  824. function tripal_analysis_interpro_get_settings() {
  825. // Get an array of node types with internal names as keys
  826. $options = node_get_types('names');
  827. // Add 'chado_feature' to allowed content types for showing interpro results
  828. $allowedoptions ['chado_feature'] = "Show Interpro results on feature pages";
  829. $form['description'] = array(
  830. '#type' => 'item',
  831. '#value' => t("Some chado features were analyzed using InterProScan. This option allows user to display the interpro analysis results. Please read user manual for storage and display of interpro files. Check the box to enable the analysis results. Uncheck to disable it."),
  832. '#weight' => 0,
  833. );
  834. $form['tripal_analysis_interpro_setting'] = array(
  835. '#type' => 'checkboxes',
  836. '#options' => $allowedoptions,
  837. '#default_value' => variable_get('tripal_analysis_interpro_setting',
  838. array('chado_feature')),
  839. );
  840. $settings->form = $form;
  841. $settings->title = "Tripal Interpro";
  842. return $settings;
  843. }
  844. /*******************************************************************************
  845. * Set the permission types that the chado module uses. Essentially we
  846. * want permissionis that protect creation, editing and deleting of chado
  847. * data objects
  848. */
  849. function tripal_analysis_interpro_perm(){
  850. return array(
  851. 'access chado_analysis_interpro content',
  852. 'create chado_analysis_interpro content',
  853. 'delete chado_analysis_interpro content',
  854. 'edit chado_analysis_interpro content',
  855. );
  856. }
  857. /*******************************************************************************
  858. * The following function proves access control for users trying to
  859. * perform actions on data managed by this module
  860. */
  861. function chado_analysis_interpro_access($op, $node, $account){
  862. if ($op == 'create') {
  863. return user_access('create chado_analysis_interpro content', $account);
  864. }
  865. if ($op == 'update') {
  866. if (user_access('edit chado_analysis_interpro content', $account)) {
  867. return TRUE;
  868. }
  869. }
  870. if ($op == 'delete') {
  871. if (user_access('delete chado_analysis_interpro content', $account)) {
  872. return TRUE;
  873. }
  874. }
  875. if ($op == 'view') {
  876. if (user_access('access chado_analysis_interpro content', $account)) {
  877. return TRUE;
  878. }
  879. }
  880. return FALSE;
  881. }