tripal_analysis_blast.module 60 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584
  1. <?php
  2. /*******************************************************************************
  3. * Tripal Blast Result lets users show/hide blast results associated
  4. * with a tripal feature
  5. ******************************************************************************/
  6. function tripal_analysis_blast_init(){
  7. // Add javascript and style sheet
  8. drupal_add_css(drupal_get_path('theme', 'tripal').'/css/tripal_analysis_blast.css');
  9. drupal_add_js(drupal_get_path('theme', 'tripal').'/js/tripal_analysis_blast.js');
  10. }
  11. /*******************************************************************************
  12. * tripal_analysis_blast_menu()
  13. * HOOK: Implementation of hook_menu()
  14. * Entry points and paths of the module
  15. */
  16. function tripal_analysis_blast_menu() {
  17. // Show top 10/25/all blast results for ajax calls
  18. $items['tripal_top_blast'] = array(
  19. 'path' => 'top_blast',
  20. 'title' => t('Blast Hits'),
  21. 'page callback' => 'tripal_get_feature_blast_results_ajax',
  22. 'page arguments' => array(1,2,3),
  23. 'access arguments' => array('access content'),
  24. 'type' => MENU_CALLBACK
  25. );
  26. // Show regular expressions for selected database in Blast admin page
  27. $items['admin/tripal/tripal_blast_regex'] = array(
  28. 'title' => t('Blast Regex'),
  29. 'page callback' => 'tripal_get_blast_regex',
  30. 'page arguments' => array(3),
  31. 'access arguments' => array('administer site configuration'),
  32. 'type' => MENU_CALLBACK
  33. );
  34. return $items;
  35. }
  36. /*******************************************************************************
  37. * tripal_analysis_blast_nodeapi()
  38. * HOOK: Implementation of hook_nodeapi()
  39. * Display blast results for allowed node types
  40. */
  41. function tripal_analysis_blast_nodeapi(&$node, $op, $teaser, $page) {
  42. switch ($op) {
  43. case 'view':
  44. // Find out which node types for showing the blast
  45. $types_to_show = variable_get('tripal_analysis_blast_setting',
  46. array('chado_feature'));
  47. // Abort if this node is not one of the types we should show.
  48. if (!in_array($node->type, $types_to_show, TRUE)) {
  49. break;
  50. }
  51. // Add blast to the content item if it's not a teaser
  52. if (!$teaser && $node->feature->feature_id) {
  53. if($node->build_mode == NODE_BUILD_SEARCH_INDEX){
  54. $node->content['tripal_analysis_blast_index_version'] = array(
  55. '#value' => theme('tripal_analysis_blast_results_index_version',$node),
  56. '#weight' => 8,
  57. );
  58. } else {
  59. if(strcmp($node->type,'chado_feature')==0){
  60. // Show blast result if not at teaser view
  61. $node->content['tripal_feature_blast_results'] = array(
  62. '#value' => theme('tripal_feature_blast_results', $node),
  63. '#weight' => 8
  64. );
  65. }
  66. }
  67. }
  68. }
  69. }
  70. /************************************************************************
  71. * We need to let drupal know about our theme functions and their arguments.
  72. * We create theme functions to allow users of the module to customize the
  73. * look and feel of the output generated in this module
  74. */
  75. function tripal_analysis_blast_theme () {
  76. return array(
  77. 'tripal_analysis_blast_results_index_version' => array (
  78. 'arguments' => array('node'),
  79. ),
  80. 'tripal_feature_blast_results' => array(
  81. 'arguments' => array('node'=> null),
  82. 'template' => 'tripal_feature_blast_results',
  83. )
  84. );
  85. }
  86. /*******************************************************************************
  87. *
  88. */
  89. function tripal_get_feature_blast_results_ajax($feature_id, $db_id, $max){
  90. $sql = "SELECT nid FROM {chado_feature} WHERE feature_id = %d";
  91. $nid = db_fetch_object(db_query($sql,$feature_id));
  92. $node = node_load($nid->nid);
  93. // add the additional variables that the theme needs to generate the output
  94. $node->db_id = $db_id;
  95. $node->max = $max;
  96. // call the theme to rebuild the blast results
  97. drupal_json(array('update' => theme('tripal_feature_blast_results',$node)));
  98. }
  99. /*******************************************************************************
  100. *
  101. */
  102. function tripal_analysis_blast_preprocess_tripal_feature_blast_results(&$variables){
  103. $feature = $variables['node']->feature;
  104. $db_id = $variables['node']->db_id;
  105. $max = 10;
  106. if(isset($variables['node']->max)){
  107. $max = $variables['node']->max;
  108. }
  109. $blast_results = tripal_get_feature_blast_results($feature->feature_id, $db_id, $max);
  110. $variables['tripal_analysis_blast']['blast_results_list'] = $blast_results;
  111. }
  112. /*******************************************************************************
  113. * Prepare blast result for the feature shown on the page
  114. */
  115. function theme_tripal_analysis_blast_results_index_version ($node) {
  116. $feature = $node->feature;
  117. $content = tripal_get_blast_results_index_version($feature->feature_id);
  118. return $content;
  119. }
  120. /*******************************************************************************
  121. * tripal_get_feature_blast_results()
  122. * Get blast result from featureprop table for the feature
  123. */
  124. function tripal_get_feature_blast_results($feature_id, $db_id, $max){
  125. // Get the blast results stored as XML from the analysisfeatureprop table
  126. // the type for the property is named 'analysis_blast_output_iteration_hits'
  127. // and is found in the 'tripal' controlled vocabulary. This CV term was
  128. // added by this module.
  129. $sql = "SELECT AP.value AS apvalue, AFP.value AS afpvalue, AF.analysis_id AS aid
  130. FROM {analysisfeatureprop} AFP
  131. INNER JOIN {analysisfeature} AF ON AF.analysisfeature_id = AFP.analysisfeature_id
  132. INNER JOIN {analysisprop} AP ON AP.analysis_id = AF.analysis_id
  133. INNER JOIN {cvterm} CVT on AFP.type_id = CVT.cvterm_id
  134. INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
  135. WHERE AF.feature_id = %d AND CV.name = '%s' AND
  136. CVT.name = '%s' AND AP.value like '%|%' ";
  137. $result = db_query($sql, $feature_id,'tripal','analysis_blast_output_iteration_hits');
  138. tripal_db_set_active($previous_db);
  139. // get the HTML content for viewing each of the XML file
  140. $blast_obj_array = array ();
  141. $blast_obj_counter = 0;
  142. while ($analysisfeatureprop = db_fetch_object($result)) {
  143. // the database db_id is stored in the value field of the table
  144. // along with the XML. The two are separated by a bar. We
  145. // will separate these two:
  146. $blastsettings = explode("|", $analysisfeatureprop->apvalue);
  147. // if we don't have the proper number of fields in the value column then
  148. // skip this entry
  149. if(count($blastsettings) != 3){
  150. continue;
  151. }
  152. if(!$blastsettings[0]){
  153. continue;
  154. }
  155. $att_db_id = $blastsettings[0];
  156. // get analysis name and date
  157. $previous_db = tripal_db_set_active('chado');
  158. $sql = "SELECT analysis_id AS aid, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  159. FROM {analysis}
  160. WHERE analysis_id = %d";
  161. $analysis = db_fetch_object(db_query($sql, $analysisfeatureprop->aid));
  162. tripal_db_set_active($previous_db);
  163. // Get db object using the db_id
  164. $previous_db = tripal_db_set_active('chado');
  165. $sql = "SELECT * FROM {db} WHERE db_id=%d";
  166. $db = db_fetch_object(db_query($sql, $att_db_id));
  167. tripal_db_set_active($previous_db);
  168. // we want to add this blast result to our list if a database id has
  169. // not been specified or if it has been specified and the database id
  170. // for this analysis matches that of the one requested
  171. if(!$db_id or ($db_id and $att_db_id == $db_id)) {
  172. $blast_obj = tripal_analysis_blast_get_result_object($analysisfeatureprop->afpvalue,$db,$max,$feature_id, $analysis);
  173. $blast_obj->analysis = $analysis;
  174. $blast_obj_array [$blast_obj_counter] = $blast_obj;
  175. $blast_obj_counter ++;
  176. }
  177. }
  178. return $blast_obj_array;
  179. }
  180. /*******************************************************************************
  181. * Scanning the file folder for blast results and prepare content for indexing
  182. */
  183. function tripal_get_blast_results_index_version ($feature_id){
  184. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  185. // for inserting into the analysisfeatureprop table
  186. $previous_db = tripal_db_set_active('chado');
  187. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  188. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  189. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  190. "AND CV.name = 'tripal'";
  191. $type_id = db_result(db_query($sql));
  192. // Get xml string from analysisfeatureprop value column, get db_id from analysisprop value column
  193. // , and get analysis_id from analysisfeature table
  194. $sql = "SELECT AP.value AS apvalue, AFP.value AS afpvalue, AF.analysis_id AS aid
  195. FROM {analysisfeatureprop} AFP
  196. INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id
  197. INNER JOIN analysisprop AP ON AP.analysis_id = AF.analysis_id
  198. WHERE feature_id = %d
  199. AND AFP.type_id = %d ";
  200. $result = db_query($sql, $feature_id, $type_id);
  201. tripal_db_set_active($previous_db);
  202. // get the HTML content for viewing each of the XML file
  203. while ($analysisfeatureprop = db_fetch_object($result)) {
  204. // get analysis name and date
  205. $previous_db = tripal_db_set_active('chado');
  206. $sql = "SELECT analysis_id AS aid, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  207. FROM {analysis} WHERE analysis_id = %d";
  208. $analysis = db_fetch_object(db_query($sql, $analysisfeatureprop->aid));
  209. tripal_db_set_active($previous_db);
  210. $blastsettings = explode("|", $analysisfeatureprop->apvalue);
  211. $att_db_id = $blastsettings [0];
  212. // Get db object using the db_id
  213. $previous_db = tripal_db_set_active('chado');
  214. $sql = "SELECT * FROM {db} WHERE db_id=%d";
  215. $db = db_fetch_object(db_query($sql, $att_db_id));
  216. tripal_db_set_active($previous_db);
  217. // Only index best 10 hits because the default page only shows 10 blast results
  218. $max = 10;
  219. $content .= parse_NCBI_Blast_XML_index_version($analysisfeatureprop->afpvalue,$db,$max,$feature_id,$ajax, $analysis);
  220. }
  221. return $content;
  222. }
  223. /*******************************************************************************
  224. * Parse NCBI Blast results for indexing so that user can use blast results to
  225. * find corresponding features
  226. */
  227. function parse_NCBI_Blast_XML_index_version($xml_string,$db,$feature_id) {
  228. // Get the parser using db_id
  229. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  230. $parser = db_fetch_object(db_query($sql, $db->db_id));
  231. $db_name = $parser->displayname;
  232. $is_genbank = $parser->genbank_style;
  233. $regex_hit_id = $parser->regex_hit_id;
  234. $regex_hit_def = $parser->regex_hit_def;
  235. $regex_hit_accession = $parser->regex_hit_accession;
  236. // set default if regular expressions have not been specified
  237. if(!$regex_hit_id){
  238. $regex_hit_id = '/^(.*?)\s.*$/';
  239. } else {
  240. $regex_hit_id = '/'.$regex_hit_id.'/';
  241. }
  242. if(!$regex_hit_def){
  243. $regex_hit_def = '/^.*?\s(.*)$/';
  244. } else {
  245. $regex_hit_def = '/'.$regex_hit_def.'/';
  246. }
  247. if(!$regex_hit_accession){
  248. $regex_hit_accession = '/^(.*?)\s.*$/';
  249. } else {
  250. $regex_hit_accession = '/'.$regex_hit_accession.'/';
  251. }
  252. $html_out .= "<h3>$db_name</h3>";
  253. // Load the file. This XML file should be an extract
  254. // of the original XML file with only a single iteration.
  255. // An iteration is essentially all the hits for a single
  256. // query sequence.
  257. $xml_output = simplexml_load_string($xml_string);
  258. $iteration = '';
  259. // new XML file parser has added the feature name within <Iteration_query-def> tags.
  260. if ($xml_output->getName() == 'Iteration') {
  261. foreach ($xml_output->children() as $xml_tag) {
  262. if ($xml_tag->getName() == 'Iteration_query-def') {
  263. // Here we show the feature name again to check if we pull the correct data
  264. $html_out .= "Query: $xml_tag<br>";
  265. } else if ($xml_tag->getName() == 'Iteration_hits') {
  266. $iteration = $xml_tag;
  267. }
  268. }
  269. // This is for the file parsed by the old parser
  270. } else {
  271. $iteration = $xml_output;
  272. }
  273. // now run through the blast hits/hsps of this iteration
  274. // and generate the rows of the table
  275. foreach($iteration->children() as $hits){
  276. $best_evalue = 0;
  277. foreach($hits->children() as $hit){
  278. $best_evalue = 0;
  279. $element_name = $hit->getName();
  280. if($element_name == 'Hit_id'){
  281. // if parsing "name, acc, desc" from three tags (1/3)
  282. if ($is_genbank) {
  283. $hit_name = $hit;
  284. }
  285. } else if($element_name == 'Hit_def'){
  286. if($is_genbank){
  287. $description = $hit;
  288. } else {
  289. $accession = preg_replace($regex_hit_accession,"$1",$hit);
  290. $hit_name = preg_replace($regex_hit_id,"$1",$hit);
  291. $description = preg_replace($regex_hit_def,"$1",$hit);
  292. }
  293. } else if($element_name == 'Hit_accession'){
  294. // if parsing "name, acc, desc" from three tags (3/3)
  295. if ($is_genbank){
  296. $accession = $hit;
  297. }
  298. // now run through each HSP for this hit
  299. }
  300. }
  301. $html_out .= "<p>$hit_name<br>";
  302. $html_out .= "$accession<br>";
  303. $html_out .= "<b>$description</b></br>";
  304. $hsp_html_out = '';
  305. }
  306. return $html_out;
  307. }
  308. /*******************************************************************************
  309. * Tripal Blast administrative setting form. This function is called by
  310. * tripal_analysis module which asks for an admin form to show on the page
  311. */
  312. function tripal_analysis_blast_get_settings() {
  313. // Get an array of node types with internal names as keys
  314. $options = node_get_types('names');
  315. // Add 'chado_feature' to allowed content types for showing blast results
  316. $allowedoptions ['chado_feature'] = "Show blast results on feature pages";
  317. $form['description'] = array(
  318. '#type' => 'item',
  319. '#value' => t("Most chado features were analyzed by blast against major sequence databases. This option allows user to display the blast analysis results. Please read user manual for storage and display of blast files. Check the box to enable the analysis results. Uncheck to disable it."),
  320. '#weight' => 0,
  321. );
  322. $form['tripal_analysis_blast_setting'] = array(
  323. '#type' => 'checkboxes',
  324. '#options' => $allowedoptions,
  325. '#default_value' => variable_get('tripal_analysis_blast_setting',
  326. array('chado_feature')),
  327. );
  328. $form['blast_parser'] = array(
  329. '#title' => t('Blast Parser Settings'),
  330. '#type' => 'fieldset',
  331. '#description' => t('Configure parsers for showing blast results. Each database is '.
  332. 'allowed to have one xml parser.'),
  333. '#weight' => 10
  334. );
  335. $previous_db = tripal_db_set_active('chado'); // use chado database
  336. // get a list of db from chado for user to choose
  337. $sql = 'SELECT db_id, name FROM {db} ORDER BY lower(name)';
  338. $results = db_query ($sql);
  339. $blastdbs = array();
  340. while ($db = db_fetch_object($results)){
  341. $blastdbs[$db->db_id] = $db->name;
  342. }
  343. $form['db_options'] = array(
  344. '#type' => 'value',
  345. '#value' => $blastdbs
  346. );
  347. $form['blast_parser']['blastdb'] = array(
  348. '#title' => t('Database'),
  349. '#type' => 'select',
  350. '#description' => t('The database used for the blast analysis.'),
  351. '#options' => $form['db_options']['#value'],
  352. '#attributes' => array(
  353. 'onChange' => "return tripal_update_regex(this)",
  354. )
  355. );
  356. $form['blast_parser']['displayname'] = array(
  357. '#title' => t('Title for the blast analysis'),
  358. '#type' => 'textfield',
  359. );
  360. $form['blast_parser']['gb_style_parser'] = array(
  361. '#title' => t('Use Genebank style parser. This will clear all regular expression settings for the selected database.'),
  362. '#type' => 'checkbox',
  363. '#attributes' => array(
  364. 'onClick' => "return tripal_set_genbank_style(this)",
  365. )
  366. );
  367. $form['blast_parser']['hit_id'] = array(
  368. '#title' => t('Regular expression for Hit Name'),
  369. '#type' => 'textfield',
  370. );
  371. $form['blast_parser']['hit_def'] = array(
  372. '#title' => t('Regular expression for Hit Description'),
  373. '#type' => 'textfield',
  374. );
  375. $form['blast_parser']['hit_accession'] = array(
  376. '#title' => t('Regular expression for Hit Accession'),
  377. '#type' => 'textfield',
  378. );
  379. $form['blast_parser']['button'] = array(
  380. '#type' => 'submit',
  381. '#value' => t('Save settings')
  382. );
  383. tripal_db_set_active($previous_db); // use drupal database
  384. $settings->form = $form;
  385. $settings->title = "Tripal Blast";
  386. return $settings;
  387. }
  388. /*******************************************************************************
  389. * Parse Blast XML Output file into analysisfeatureprop table
  390. */
  391. function tripal_analysis_blast_parseXMLFile ($analysis_id, $blastdb, $blastfile, $job_id) {
  392. // Prepare log
  393. $filename = preg_replace("/.*\/(.*)/", "$1", $blastfile);
  394. $logfile = file_directory_path() . "/tripal/tripal_analysis_blast/load_$filename.log";
  395. $log = fopen($logfile, 'a'); // append parsing results to log file
  396. // If user input a file (e.g. blast.xml)
  397. if (is_file($blastfile)) {
  398. // Parsing started
  399. print "Parsing File:".$blastfile." ...\n";
  400. fwrite($log, date("D M j G:i:s Y").". Loading $blastfile\n");
  401. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  402. // for inserting into the analysisfeatureprop table
  403. $previous_db = tripal_db_set_active('chado'); // use chado database
  404. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  405. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  406. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  407. "AND CV.name = 'tripal'";
  408. $type_id = db_result(db_query($sql));
  409. // Load the XML file.
  410. $blastoutput = simplexml_load_file($blastfile);
  411. $no_iterations = 0;
  412. foreach($blastoutput->children() as $tmp) {
  413. if ($tmp->getName() == 'BlastOutput_iterations') {
  414. foreach($tmp->children() as $itr) {
  415. if ($itr->getName() == 'Iteration') {
  416. $no_iterations ++;
  417. }
  418. }
  419. }
  420. }
  421. print "$no_iterations iterations to be processed.\n";
  422. $interval = intval($no_iterations * 0.01);
  423. $idx_iterations = 0;
  424. foreach ($blastoutput->children() as $blastoutput_tags) {
  425. if ($blastoutput_tags->getName() == 'BlastOutput_iterations') {
  426. foreach($blastoutput_tags->children() as $iterations) {
  427. if ($iterations->getName() == 'Iteration') {
  428. // Set job status
  429. $idx_iterations ++;
  430. if ($idx_iterations % $interval == 0) {
  431. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  432. tripal_db_set_active($previous_db);
  433. tripal_job_set_progress($job_id, $percentage);
  434. $previous_db = tripal_db_set_active('chado');
  435. print $percentage."% ";
  436. }
  437. // now run through the blast hits/hsps of this iteration
  438. // and generate the rows of the table
  439. $feature_id = 0;
  440. foreach($iterations->children() as $iteration_tags) {
  441. // Match chado feature uniquename with <Iteration_query-def>
  442. // and get the feature_id
  443. $featurenaem_xml = '';
  444. if($iteration_tags->getName() == 'Iteration_query-def'){
  445. // If the Iteration_query-def in the format of "feature_id|uniquename"
  446. // get feature_id from it directly
  447. if (preg_match("/^(\d+)\|.+/", $iteration_tags, $matches)) {
  448. $feature_id = $matches[1];
  449. // If not in above format, try to match <Iteration_query-def> to feature's uniquename
  450. } else {
  451. // treat the first word of <Iteration_query-def> as uniquename
  452. $first_word = $iteration_tags;
  453. if (preg_match('/^(.*?)\s.*$/', $iteration_tags, $matches)) {
  454. $first_word = $matches[1];
  455. }
  456. // Find out how many features match this uniquename
  457. $sql = "SELECT count(feature_id) FROM {feature} ".
  458. "WHERE uniquename = '%s' ";
  459. $no_features = db_result(db_query($sql, $first_word));
  460. // If there is only one match, get the feature_id
  461. if ($no_features == 1) {
  462. $sql = "SELECT feature_id FROM {feature} ".
  463. "WHERE uniquename = '%s' ";
  464. $feature_id = db_result(db_query($sql, $first_word));
  465. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  466. } else if ($no_features > 1) {
  467. fwrite($log, "Ambiguous: ".$first_word." matches more than one feature and is not processed.\n");
  468. continue;
  469. // If the uniquename did not match, skip and print 'Failed'
  470. } else {
  471. fwrite($log, "Failed: ".$first_word."\n");
  472. }
  473. }
  474. // Successfully matched. print 'Succeeded'
  475. if ($feature_id) {
  476. fwrite($log, "Succeeded: ".$first_word." => feature id:".$feature_id);
  477. $featurename_xml = $iteration_tags->asXML();
  478. }
  479. // Insert Iteration_hits into analysisfeatureprop and analysisfeature tables
  480. } else if($iteration_tags->getName() == 'Iteration_hits'){
  481. if ($feature_id) {
  482. // Make sure this iteration doesn't exist in analysisfeatureprop. If it does, update but not insert
  483. $sql = "SELECT analysisfeatureprop_id FROM {analysisfeatureprop} AFP ".
  484. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  485. "WHERE feature_id=%d ".
  486. "AND analysis_id=%d ".
  487. "AND type_id=%d ";
  488. $result = db_query($sql, $feature_id, $analysis_id, $type_id);
  489. $analysisfeatureprop = db_fetch_object($result);
  490. $xml_content = "<Iteration>\n".$featurename_xml."\n".$iteration_tags->asXML()."\n</Iteration>";
  491. // If this Iteration_hits already exists, update it
  492. if ($analysisfeatureprop) {
  493. $sql = "UPDATE {analysisfeatureprop} ".
  494. "SET value = '%s' ".
  495. "WHERE analysisfeatureprop_id = %d ";
  496. db_query($sql, $xml_content, $analysisfeatureprop->analysisfeatureprop_id);
  497. fwrite($log, " (Update)\n"); // write to log
  498. // Otherwise, insert the Iteration_hits into analysisfeature and analysisfeatureprop tables
  499. } else {
  500. //------------------------------------------------------
  501. // Insert into analysisfeature table
  502. //------------------------------------------------------
  503. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  504. "VALUES (%d, %d)";
  505. db_query ($sql, $feature_id, $analysis_id);
  506. // Get the newly inserted analysisfeature_id
  507. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
  508. $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
  509. //------------------------------------------------------
  510. // Insert into analysisfeatureprop table
  511. //------------------------------------------------------
  512. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  513. "VALUES (%d, %d, '%s', %d)";
  514. db_query($sql, $analysisfeature_id, $type_id, $xml_content, '0');
  515. fwrite($log, " (Insert)\n"); // write to log
  516. }
  517. }
  518. }
  519. }
  520. }
  521. }
  522. }
  523. }
  524. tripal_db_set_active ($previous_db); // Use drupal database
  525. // Otherwise, $blastfile is a directory. Iterate through all xml files in it
  526. } else {
  527. $dir_handle = @opendir($blastfile) or die("Unable to open $blastfile");
  528. $pattern = sql_regcase($blastfile . "/*.XML");
  529. $total_files = count(glob($pattern));
  530. print "$total_files file(s) to be parsed.\n";
  531. $interval = intval($total_files * 0.01);
  532. $no_file = 0;
  533. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  534. // for inserting into the analysisfeatureprop table
  535. $previous_db = tripal_db_set_active('chado'); // use chado database
  536. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  537. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  538. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  539. "AND CV.name = 'tripal'";
  540. $type_id = db_result(db_query($sql));
  541. // Parsing all files in the directory
  542. while ($file = readdir($dir_handle)) {
  543. if(preg_match("/^.*\.XML/i",$file)){
  544. // Set job status
  545. if ($no_file % $interval == 0) {
  546. $percentage = (int) ($no_file / $total_files * 100);
  547. tripal_db_set_active($previous_db);
  548. tripal_job_set_progress($job_id, $percentage);
  549. $previous_db = tripal_db_set_active('chado');
  550. print $percentage."% ";
  551. }
  552. // Parsing started
  553. print "Parsing File:".$file.". ";
  554. fwrite($log, date("D M j G:i:s Y").". Loading $file\n");
  555. // Load the XML file.
  556. $blastoutput = simplexml_load_file($blastfile."/".$file);
  557. $no_iterations = 0;
  558. foreach($blastoutput->children() as $tmp) {
  559. if ($tmp->getName() == 'BlastOutput_iterations') {
  560. foreach($tmp->children() as $itr) {
  561. if ($itr->getName() == 'Iteration') {
  562. $no_iterations ++;
  563. }
  564. }
  565. }
  566. }
  567. print "$no_iterations iterations to be processed.\n";
  568. foreach ($blastoutput->children() as $blastoutput_tags) {
  569. if ($blastoutput_tags->getName() == 'BlastOutput_iterations') {
  570. foreach($blastoutput_tags->children() as $iterations) {
  571. if ($iterations->getName() == 'Iteration') {
  572. // now run through the blast hits/hsps of this iteration
  573. // and generate the rows of the table
  574. $feature_id = 0;
  575. foreach($iterations->children() as $iteration_tags) {
  576. // Match chado feature uniquename with <Iteration_query-def>
  577. // and get the feature_id
  578. $featurenaem_xml = '';
  579. if($iteration_tags->getName() == 'Iteration_query-def'){
  580. // If the Iteration_query-def in the format of "feature_id|uniquename"
  581. // get feature_id from it directly
  582. if (preg_match("/^(\d+)\|.+/", $iteration_tags, $matches)) {
  583. $feature_id = $matches[1];
  584. // If not in above format, try to match <Iteration_query-def> to feature's uniquename
  585. } else {
  586. // treat the first word of <Iteration_query-def> as uniquename
  587. $first_word = $iteration_tags;
  588. if (preg_match('/^(.*?)\s.*$/', $iteration_tags, $matches)) {
  589. $first_word = $matches[1];
  590. }
  591. // Find out how many features match this uniquename
  592. $sql = "SELECT count(feature_id) FROM {feature} ".
  593. "WHERE uniquename = '%s' ";
  594. $no_features = db_result(db_query($sql, $first_word));
  595. // If there is only one match, get the feature_id
  596. if ($no_features == 1) {
  597. $sql = "SELECT feature_id FROM {feature} ".
  598. "WHERE uniquename = '%s' ";
  599. $feature_id = db_result(db_query($sql, $first_word));
  600. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  601. } else if ($no_features > 1) {
  602. fwrite($log, "Ambiguous: ".$first_word." matches more than one feature and is not processed.\n");
  603. continue;
  604. // If the uniquename did not match, skip and print 'Failed'
  605. } else {
  606. fwrite($log, "Failed: ".$first_word."\n");
  607. }
  608. }
  609. // Successfully matched. print 'Succeeded'
  610. if ($feature_id) {
  611. fwrite($log, "Succeeded: ".$first_word." => feature id:".$feature_id);
  612. $featurename_xml = $iteration_tags->asXML();
  613. }
  614. // Insert Iteration_hits into analysisfeatureprop and analysisfeature tables
  615. } else if($iteration_tags->getName() == 'Iteration_hits'){
  616. if ($feature_id) {
  617. // Make sure this iteration doesn't exist in analysisfeatureprop. If it does, update but not insert
  618. $sql = "SELECT analysisfeatureprop_id FROM {analysisfeatureprop} AFP ".
  619. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  620. "WHERE feature_id=%d ".
  621. "AND analysis_id=%d ".
  622. "AND type_id=%d ";
  623. $result = db_query($sql, $feature_id, $analysis_id, $type_id);
  624. $analysisfeatureprop = db_fetch_object($result);
  625. $xml_content = "<Iteration>\n".$featurename_xml."\n".$iteration_tags->asXML()."\n</Iteration>";
  626. // If this Iteration_hits already exists, update it
  627. if ($analysisfeatureprop) {
  628. $sql = "UPDATE {analysisfeatureprop} ".
  629. "SET value = '%s' ".
  630. "WHERE analysisfeatureprop_id = %d ";
  631. db_query($sql, $xml_content, $analysisfeatureprop->analysisfeatureprop_id);
  632. fwrite($log, " (Update)\n"); // write to log
  633. // Otherwise, insert the Iteration_hits into analysisfeature and analysisfeatureprop tables
  634. } else {
  635. //------------------------------------------------------
  636. // Insert into analysisfeature table
  637. //------------------------------------------------------
  638. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  639. "VALUES (%d, %d)";
  640. db_query ($sql, $feature_id, $analysis_id);
  641. // Get the newly inserted analysisfeature_id
  642. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
  643. $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
  644. //------------------------------------------------------
  645. // Insert into analysisfeatureprop table
  646. //------------------------------------------------------
  647. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  648. "VALUES (%d, %d, '%s', %d)";
  649. db_query($sql, $analysisfeature_id, $type_id, $xml_content, '0');
  650. fwrite($log, " (Insert)\n"); // write to log
  651. }
  652. }
  653. }
  654. }
  655. }
  656. }
  657. }
  658. }
  659. $no_file ++;
  660. }
  661. }
  662. tripal_db_set_active ($previous_db); // Use drupal database
  663. }
  664. print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
  665. fwrite($log, "\n");
  666. fclose($log);
  667. return;
  668. }
  669. /*******************************************************************************
  670. * This function is only called by ajax to get regular expressions for blast
  671. * admin page
  672. */
  673. function tripal_get_blast_regex ($db_id) {
  674. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  675. $blast_regexs = db_fetch_object(db_query($sql, $db_id));
  676. drupal_json(
  677. array(
  678. 'name' => $blast_regexs->displayname,
  679. 'genbank_style' => $blast_regexs->genbank_style,
  680. 'reg1' => $blast_regexs->regex_hit_id,
  681. 'reg2' => $blast_regexs->regex_hit_def,
  682. 'reg3' => $blast_regexs->regex_hit_accession,
  683. )
  684. );
  685. }
  686. /*******************************************************************************
  687. * Provide information to drupal about the node types that we're creating
  688. * in this module
  689. */
  690. function tripal_analysis_blast_node_info() {
  691. $nodes = array();
  692. $nodes['chado_analysis_blast'] = array(
  693. 'name' => t('Analysis: Blast'),
  694. 'module' => 'chado_analysis_blast',
  695. 'description' => t('A blast analysis from the chado database'),
  696. 'has_title' => FALSE,
  697. 'title_label' => t('Analysis: Blast'),
  698. 'has_body' => FALSE,
  699. 'body_label' => t('Blast Analysis Description'),
  700. 'locked' => TRUE
  701. );
  702. return $nodes;
  703. }
  704. /*******************************************************************************
  705. * Provide a Blast Analysis form
  706. */
  707. function chado_analysis_blast_form ($node){
  708. //dprint_r($node);
  709. $type = node_get_types('type', $node);
  710. $form = array();
  711. $form['title']= array(
  712. '#type' => 'hidden',
  713. '#default_value' => $node->title,
  714. );
  715. $form['analysisname']= array(
  716. '#type' => 'textfield',
  717. '#title' => t('Analysis Name'),
  718. '#required' => FALSE,
  719. '#default_value' => $node->analysisname,
  720. '#weight' => 1
  721. );
  722. $form['program']= array(
  723. '#type' => 'textfield',
  724. '#title' => t('Program'),
  725. '#required' => TRUE,
  726. '#default_value' => $node->program,
  727. '#weight' => 2
  728. );
  729. $form['programversion']= array(
  730. '#type' => 'textfield',
  731. '#title' => t('Program Version'),
  732. '#required' => TRUE,
  733. '#default_value' => $node->programversion,
  734. '#weight' => 3
  735. );
  736. $form['algorithm']= array(
  737. '#type' => 'textfield',
  738. '#title' => t('Algorithm'),
  739. '#required' => FALSE,
  740. '#default_value' => $node->algorithm,
  741. '#weight' => 4
  742. );
  743. $form['sourcename']= array(
  744. '#type' => 'textfield',
  745. '#title' => t('Source Name'),
  746. '#required' => FALSE,
  747. '#default_value' => $node->sourcename,
  748. '#weight' => 5
  749. );
  750. $form['sourceversion']= array(
  751. '#type' => 'textfield',
  752. '#title' => t('Source Version'),
  753. '#required' => FALSE,
  754. '#default_value' => $node->sourceversion,
  755. '#weight' => 6
  756. );
  757. $form['sourceuri']= array(
  758. '#type' => 'textfield',
  759. '#title' => t('Source URI'),
  760. '#required' => FALSE,
  761. '#default_value' => $node->sourceuri,
  762. '#weight' => 7
  763. );
  764. // Get time saved in chado
  765. $default_time = $node->timeexecuted;
  766. $year = preg_replace("/^(\d+)-\d+-\d+ .*/", "$1", $default_time);
  767. $month = preg_replace("/^\d+-0?(\d+)-\d+ .*/", "$1", $default_time);
  768. $day = preg_replace("/^\d+-\d+-0?(\d+) .*/", "$1", $default_time);
  769. // If the time is not set, use current time
  770. if (!$default_time) {
  771. $default_time = time();
  772. $year = format_date($default_time, 'custom', 'Y');
  773. $month = format_date($default_time, 'custom', 'n');
  774. $day = format_date($default_time, 'custom', 'j');
  775. }
  776. $form['timeexecuted']= array(
  777. '#type' => 'date',
  778. '#title' => t('Time Executed'),
  779. '#required' => TRUE,
  780. '#default_value' => array(
  781. 'year' => $year,
  782. 'month' => $month,
  783. 'day' => $day,
  784. ),
  785. '#weight' => 8
  786. );
  787. $form['description']= array(
  788. '#type' => 'textarea',
  789. '#rows' => 15,
  790. '#title' => t('Description and/or Program Settings'),
  791. '#required' => FALSE,
  792. '#default_value' => check_plain($node->description),
  793. '#weight' => 9
  794. );
  795. // Blast specific settings
  796. if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {
  797. $prop_values = explode("|", $node->blastdb);
  798. $node->blastdb = $prop_values[0];
  799. $node->blastfile = $prop_values[1];
  800. $node->blastparameters = $prop_values[2];
  801. }
  802. $form['blast'] = array(
  803. '#title' => t('Blast Settings'),
  804. '#type' => 'fieldset',
  805. '#description' => t('Specific Settings for Blast Analysis.'),
  806. '#collapsible' => TRUE,
  807. '#attributes' => array('id' => 'blast-extra-settings'),
  808. '#weight' => 11
  809. );
  810. $previous_db = tripal_db_set_active('chado'); // use chado database
  811. // get a list of db from chado for user to choose
  812. $sql = 'SELECT db_id, name FROM {db} ORDER BY lower(name)';
  813. $results = db_query ($sql);
  814. tripal_db_set_active($previous_db);
  815. $blastdbs = array();
  816. while ($db = db_fetch_object($results)){
  817. $blastdbs[$db->db_id] = $db->name;
  818. }
  819. $form['db_options'] = array(
  820. '#type' => 'value',
  821. '#value' => $blastdbs
  822. );
  823. $form['blast']['blastdb'] = array(
  824. '#title' => t('Database'),
  825. '#type' => 'select',
  826. '#description' => t('The database used for the blast analysis.'),
  827. '#options' => $form['db_options']['#value'],
  828. '#default_value' => $node->blastdb,
  829. );
  830. $form['blast']['blastfile'] = array(
  831. '#title' => t('Blast xml File: (if you input a directory without the tailing slash, all xml files in the directory will be loaded)'),
  832. '#type' => 'textfield',
  833. '#description' => t('The xml output file generated by blast in full path.'),
  834. '#default_value' => $node->blastfile,
  835. );
  836. $form['blast']['blastjob'] = array(
  837. '#type' => 'checkbox',
  838. '#title' => t('Submit a job to parse the xml output into analysisfeatureprop table'),
  839. '#description' => t('Note: features associated with the blast results must '.
  840. 'exist in chado before parsing the file. Otherwise, blast '.
  841. 'results that cannot be linked to a feature will be '.
  842. 'discarded. Also, Triapl Blast module needs to be enabled.'),
  843. '#default_value' => $node->blastjob
  844. );
  845. $form['blast']['blastparameters'] = array(
  846. '#title' => t('Parameters'),
  847. '#type' => 'textfield',
  848. '#description' => t('The parameters for running the blast analysis.'),
  849. '#default_value' => $node->blastparameters,
  850. );
  851. return $form;
  852. }
  853. function chado_analysis_blast_validate($node, &$form){
  854. ##dprint_r($node);
  855. // This validation is being used for three activities:
  856. // CASE A: Update a node that exists in both drupal and chado
  857. // CASE B: Synchronizing a node from chado to drupal
  858. // CASE C: Inserting a new node that exists in niether drupal nor chado
  859. // Only nodes being updated will have an nid already
  860. if($node->nid){
  861. //---------------------------------------------------
  862. // CASE A: We are validating a form for updating an existing node
  863. //---------------------------------------------------
  864. // TO DO: check that the new fields don't yield a non-unique primary key in chado
  865. }
  866. else{
  867. // To differentiate if we are syncing or creating a new analysis altogther, see if an
  868. // analysis_id already exists
  869. if($node->analysis_id){
  870. //---------------------------------------------------
  871. // CASE B: Synchronizing a node from chado to drupal
  872. //---------------------------------------------------
  873. }
  874. else{
  875. //---------------------------------------------------
  876. // CASE C: We are validating a form for inserting a new node
  877. //---------------------------------------------------
  878. // The primary key for the chado analysis table is
  879. // program, programversion, sourcename
  880. // Check to see if this analysis really is new -ie, it doesn't have the same
  881. // primary key as any other analysis
  882. $sql = "SELECT analysis_id ".
  883. "FROM {analysis} ".
  884. "WHERE program='%s'".
  885. "AND programversion='%s'".
  886. "AND sourcename='%s'";
  887. $previous_db = tripal_db_set_active('chado');
  888. $analysis_id = db_result(db_query($sql, $node->program, $node->programversion, $node->sourcename));
  889. tripal_db_set_active($previous_db);
  890. if($analysis_id){
  891. //---------------------------------------------------
  892. // this primary key already exists in chado analysis table!
  893. //---------------------------------------------------
  894. // check to see if it has also been synced with drupal
  895. $sql = "SELECT nid FROM {chado_analysis} ".
  896. "WHERE analysis_id = %d";
  897. $node_id = db_result(db_query($sql, $analysis_id));
  898. if($node_id){
  899. //---------------------------------------------------
  900. // the analysis has already been synced with drupal, redirect the user
  901. // to modify that node or start over
  902. //---------------------------------------------------
  903. $error = 'This analysis already exists in the chado database (analysis id ';
  904. $error .= $analysis_id.') and has been synchronized ';
  905. $error .= 'with drupal. See node '.$node_id.' if you wish to update that analysis. ';
  906. $error .= ' For a new analysis, please select a unique primary key ';
  907. $error .= '(primary key consists of sourcename, program and programversion).';
  908. form_set_error('sourcename', t($error));
  909. }
  910. else{
  911. //---------------------------------------------------
  912. // the analysis does not exist in drupal - tell the user
  913. // to sync from chado or create a new unique primary key
  914. //---------------------------------------------------
  915. $error = 'This analysis already exists in the chado database (analysis id ';
  916. $error .= $analysis_id.') but has not been synchronized ';
  917. $error .= 'with drupal. See the tripal admin pages to synchronize. ';
  918. $error .= ' For a new analysis, please select a unique primary key ';
  919. $error .= '(primary key consists of sourcename, program and programversion).';
  920. form_set_error('sourcename', t($error));
  921. }
  922. }
  923. }
  924. }
  925. }
  926. function chado_analysis_blast_insert($node){
  927. global $user;
  928. // Create a timestamp so we can insert it into the chado database
  929. $time = $node->timeexecuted;
  930. $month = $time['month'];
  931. $day = $time['day'];
  932. $year = $time['year'];
  933. $timestamp = $month.'/'.$day.'/'.$year;
  934. //---------------------------------------------------
  935. // First add the item to the chado analysis table
  936. //---------------------------------------------------
  937. $sql = "INSERT INTO {analysis} ".
  938. " (name, description, program, programversion, algorithm, ".
  939. " sourcename, sourceversion, sourceuri, timeexecuted) ".
  940. "VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s')";
  941. $previous_db = tripal_db_set_active('chado'); // use chado database
  942. db_query($sql,$node->analysisname, $node->description,
  943. $node->program,$node->programversion,$node->algorithm,
  944. $node->sourcename, $node->sourceversion, $node->sourceuri,
  945. $timestamp);
  946. // find the newly entered analysis_id
  947. $sql = "SELECT analysis_id ".
  948. "FROM {analysis} ".
  949. "WHERE program='%s'".
  950. "AND programversion='%s'".
  951. "AND sourcename='%s'";
  952. $analysis_id = db_result(db_query($sql, $node->program,
  953. $node->programversion, $node->sourcename));
  954. // Get cvterm_id for 'analysis_blast_settings'
  955. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  956. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  957. "WHERE CVT.name = 'analysis_blast_settings' ".
  958. "AND CV.name = 'tripal'";
  959. $type_id = db_result(db_query($sql));
  960. //---------------------------------------------------
  961. // Insert into chado {analysisprop} table
  962. //---------------------------------------------------
  963. $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) ".
  964. "VALUES (%d, %d, '%s')";
  965. $blastsettings = $node->blastdb."|".$node->blastfile."|".$node->blastparameters;
  966. db_query($sql, $analysis_id, $type_id, $blastsettings);
  967. tripal_db_set_active($previous_db); // switch back to drupal database
  968. //---------------------------------------------------
  969. // Add a job if the user wants to parse the xml output
  970. //---------------------------------------------------
  971. if($node->blastjob) {
  972. $job_args[0] = $analysis_id;
  973. $job_args[1] = $node->blastdb;
  974. $job_args[2] = $node->blastfile;
  975. if (is_readable($node->blastfile)) {
  976. $fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
  977. tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
  978. 'tripal_analysis_blast_parseXMLFile', $job_args, $user->uid);
  979. } else {
  980. drupal_set_message("Can not open blast output file. Job not scheduled.");
  981. }
  982. }
  983. // next add the item to the drupal table
  984. $sql = "INSERT INTO {chado_analysis} (nid, vid, analysis_id) ".
  985. "VALUES (%d, %d, %d)";
  986. db_query($sql,$node->nid,$node->vid,$analysis_id);
  987. // Create a title for the analysis node using the unique keys so when the
  988. // node is saved, it will have a title
  989. $record = new stdClass();
  990. // If the analysis has a name, use it as the node title. If not, construct
  991. // the title using program, programversion, and sourcename
  992. if ($node->analysisname) {
  993. $record->title = $node->analysisname;
  994. } else {
  995. //Construct node title as "program (version)
  996. $record->title = "$node->program ($node->programversion)";
  997. }
  998. $record->nid = $node->nid;
  999. drupal_write_record('node',$record,'nid');
  1000. drupal_write_record('node_revisions',$record,'nid');
  1001. }
  1002. /*******************************************************************************
  1003. * Delete blast anlysis
  1004. */
  1005. function chado_analysis_blast_delete($node){
  1006. // Before removing, get analysis_id so we can remove it from chado database
  1007. // later
  1008. $sql_drupal = "SELECT analysis_id ".
  1009. "FROM {chado_analysis} ".
  1010. "WHERE nid = %d ".
  1011. "AND vid = %d";
  1012. $analysis_id = db_result(db_query($sql_drupal, $node->nid, $node->vid));
  1013. // Remove data from the {chado_analysis}, {node}, and {node_revisions} tables
  1014. $sql_del = "DELETE FROM {chado_analysis} ".
  1015. "WHERE nid = %d ".
  1016. "AND vid = %d";
  1017. db_query($sql_del, $node->nid, $node->vid);
  1018. $sql_del = "DELETE FROM {node} ".
  1019. "WHERE nid = %d ".
  1020. "AND vid = %d";
  1021. db_query($sql_del, $node->nid, $node->vid);
  1022. $sql_del = "DELETE FROM {node_revisions} ".
  1023. "WHERE nid = %d ".
  1024. "AND vid = %d";
  1025. db_query($sql_del, $node->nid, $node->vid);
  1026. //Remove from analysisfeatureprop, analysisfeature, analysis, and analysisprop tables
  1027. $previous_db = tripal_db_set_active('chado');
  1028. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE analysis_id=%d";
  1029. $results = db_query($sql, $analysis_id);
  1030. while ($af = db_fetch_object($results)) {
  1031. db_query("DELETE FROM {analysisfeatureprop} WHERE analysisfeature_id = %d", $af->analysisfeature_id);
  1032. }
  1033. db_query("DELETE FROM {analysisfeature} WHERE analysis_id = %d", $analysis_id);
  1034. db_query("DELETE FROM {analysisprop} WHERE analysis_id = %d", $analysis_id);
  1035. db_query("DELETE FROM {analysis} WHERE analysis_id = %d", $analysis_id);
  1036. tripal_db_set_active($previous_db);
  1037. }
  1038. /*******************************************************************************
  1039. * Update blast analysis
  1040. */
  1041. function chado_analysis_blast_update($node){
  1042. global $user;
  1043. if($node->revision){
  1044. // TODO -- decide what to do about revisions
  1045. } else {
  1046. // Create a timestamp so we can insert it into the chado database
  1047. $time = $node->timeexecuted;
  1048. $month = $time['month'];
  1049. $day = $time['day'];
  1050. $year = $time['year'];
  1051. $timestamp = $month.'/'.$day.'/'.$year;
  1052. // get the analysis_id for this node:
  1053. $sql = "SELECT analysis_id ".
  1054. "FROM {chado_analysis} ".
  1055. "WHERE vid = %d";
  1056. $analysis_id = db_fetch_object(db_query($sql, $node->vid))->analysis_id;
  1057. $sql = "UPDATE {analysis} ".
  1058. "SET name = '%s', ".
  1059. " description = '%s', ".
  1060. " program = '%s', ".
  1061. " programversion = '%s', ".
  1062. " algorithm = '%s', ".
  1063. " sourcename = '%s', ".
  1064. " sourceversion = '%s', ".
  1065. " sourceuri = '%s', ".
  1066. " timeexecuted = '%s' ".
  1067. "WHERE analysis_id = %d ";
  1068. $previous_db = tripal_db_set_active('chado'); // use chado database
  1069. db_query($sql, $node->analysisname, $node->description, $node->program,
  1070. $node->programversion,$node->algorithm,$node->sourcename,
  1071. $node->sourceversion, $node->sourceuri, $timestamp, $analysis_id);
  1072. // Get cvterm_id for 'analysis_blast_settings'
  1073. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  1074. "INNER JOIN cv CV ON CV.cv_id = CVT.cv_id ".
  1075. "WHERE CVT.name = 'analysis_blast_settings' ".
  1076. "AND CV.name = 'tripal'";
  1077. $type_id = db_result(db_query($sql));
  1078. $sql = "UPDATE {analysisprop} ".
  1079. "SET value = '%s' ".
  1080. "WHERE analysis_id = %d AND type_id = %d";
  1081. $blastsettings = $node->blastdb."|".$node->blastfile."|".$node->blastparameters;
  1082. db_query($sql, $blastsettings, $analysis_id, $type_id);
  1083. tripal_db_set_active($previous_db); // switch back to drupal database
  1084. // Add a job if the user wants to parse the xml output
  1085. if($node->blastjob) {
  1086. $job_args[0] = $analysis_id;
  1087. $job_args[1] = $node->blastdb;
  1088. $job_args[2] = $node->blastfile;
  1089. if (is_readable($node->blastfile)) {
  1090. $fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
  1091. tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
  1092. 'tripal_analysis_blast_parseXMLFile', $job_args, $user->uid);
  1093. } else {
  1094. drupal_set_message("Can not open blast output file. Job not scheduled.");
  1095. }
  1096. }
  1097. // Create a title for the analysis node using the unique keys so when the
  1098. // node is saved, it will have a title
  1099. $record = new stdClass();
  1100. // If the analysis has a name, use it as the node title. If not, construct
  1101. // the title using program, programversion, and sourcename
  1102. if ($node->analysisname) {
  1103. $record->title = $node->analysisname;
  1104. } else {
  1105. //Construct node title as "program (version)
  1106. $record->title = "$node->program ($node->programversion)";
  1107. }
  1108. $record->nid = $node->nid;
  1109. drupal_write_record('node',$record,'nid');
  1110. drupal_write_record('node_revisions',$record,'nid');
  1111. }
  1112. }
  1113. /*******************************************************************************
  1114. * When a node is requested by the user this function is called to allow us
  1115. * to add auxiliary data to the node object.
  1116. */
  1117. function chado_analysis_blast_load($node){
  1118. // get the analysis_id for this node:
  1119. $sql = "SELECT analysis_id FROM {chado_analysis} WHERE vid = %d";
  1120. $ana_node = db_fetch_object(db_query($sql, $node->vid));
  1121. $additions = new stdClass();
  1122. if ($ana_node) {
  1123. // get analysis information
  1124. $sql = "SELECT Analysis_id, name AS analysisname, description, program, ".
  1125. " programversion, algorithm, sourcename, sourceversion, ".
  1126. " sourceuri, timeexecuted ".
  1127. "FROM {Analysis} ".
  1128. "WHERE Analysis_id = $ana_node->analysis_id";
  1129. $previous_db = tripal_db_set_active('chado'); // use chado database
  1130. $additions = db_fetch_object(db_query($sql));
  1131. // get number of features assc with this analysis
  1132. $sql = "SELECT count(feature_id) as featurecount ".
  1133. "FROM {Analysisfeature} ".
  1134. "WHERE Analysis_id = %d";
  1135. $additions->featurecount = db_result(db_query($sql, $ana_node->analysis_id));
  1136. // get cvterm_id for 'analysis_blast_settings'
  1137. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  1138. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  1139. "WHERE CVT.name = 'analysis_blast_settings' ".
  1140. "AND CV.name = 'tripal'";
  1141. $type_id = db_result(db_query($sql));
  1142. // get analysisprop information
  1143. $sql = "SELECT value FROM {analysisprop} ".
  1144. "WHERE analysis_id = %d ".
  1145. "AND type_id = %d";
  1146. $analysisprop = db_result(db_query($sql, $ana_node->analysis_id, $type_id));
  1147. $prop_values = explode ("|", $analysisprop, 1);
  1148. $additions->blastdb = $prop_values[0];
  1149. $additions->blastfile = $prop_values[1];
  1150. $additions->blastparameters = $prop_values[2];
  1151. tripal_db_set_active($previous_db); // now use drupal database
  1152. }
  1153. // If the analysis has a name, use it as the node title. If not, construct
  1154. // the title using program programversion, and sourcename
  1155. if ($additions->analysisname) {
  1156. $additions->title = $additions->analysisname;
  1157. } else {
  1158. // Construct node title as "program version (source)
  1159. $additions->title = "$additions->program ($additions->programversion)";
  1160. }
  1161. return $additions;
  1162. }
  1163. /*******************************************************************************
  1164. * This function customizes the view of the chado_analysis node. It allows
  1165. * us to generate the markup.
  1166. */
  1167. function chado_analysis_blast_view ($node, $teaser = FALSE, $page = FALSE) {
  1168. // use drupal's default node view:
  1169. //dprint_r($node);
  1170. if (!$teaser) {
  1171. $node = node_prepare($node, $teaser);
  1172. // When previewing a node submitting form, it shows 'Array' instead of
  1173. // correct date format. We need to format the date here
  1174. $time = $node->timeexecuted;
  1175. if(is_array($time)){
  1176. $month = $time['month'];
  1177. $day = $time['day'];
  1178. $year = $time['year'];
  1179. $timestamp = $year.'-'.$month.'-'.$day;
  1180. $node->timeexecuted = $timestamp;
  1181. }
  1182. // When viewing a node, we need to reformat the analysisprop since we
  1183. // separate each value with a bar |
  1184. if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {
  1185. $prop_values = explode("|", $node->blastdb);
  1186. $node->blastdb = $prop_values[0];
  1187. $node->blastfile = $prop_values[1];
  1188. $node->blastparameters = $prop_values[2];
  1189. }
  1190. }
  1191. return $node;
  1192. }
  1193. /*******************************************************************************
  1194. * Set the permission types that the chado module uses. Essentially we
  1195. * want permissionis that protect creation, editing and deleting of chado
  1196. * data objects
  1197. */
  1198. function tripal_analysis_blast_perm(){
  1199. return array(
  1200. 'access chado_analysis_blast content',
  1201. 'create chado_analysis_blast content',
  1202. 'delete chado_analysis_blast content',
  1203. 'edit chado_analysis_blast content',
  1204. );
  1205. }
  1206. /*******************************************************************************
  1207. * The following function proves access control for users trying to
  1208. * perform actions on data managed by this module
  1209. */
  1210. function chado_analysis_blast_access($op, $node, $account){
  1211. if ($op == 'create') {
  1212. return user_access('create chado_analysis_blast content', $account);
  1213. }
  1214. if ($op == 'update') {
  1215. if (user_access('edit chado_analysis_blast content', $account)) {
  1216. return TRUE;
  1217. }
  1218. }
  1219. if ($op == 'delete') {
  1220. if (user_access('delete chado_analysis_blast content', $account)) {
  1221. return TRUE;
  1222. }
  1223. }
  1224. if ($op == 'view') {
  1225. if (user_access('access chado_analysis_blast content', $account)) {
  1226. return TRUE;
  1227. }
  1228. }
  1229. return FALSE;
  1230. }
  1231. /********************************************************************************
  1232. *
  1233. */
  1234. function tripal_analysis_blast_get_result_object($xml_string,$db,$max,$feature_id, $analysis) {
  1235. $blast_object = new stdClass();
  1236. // Get the parser using db_id
  1237. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  1238. $parser = db_fetch_object(db_query($sql, $db->db_id));
  1239. $db_name = $parser->displayname;
  1240. $is_genbank = $parser->genbank_style;
  1241. $regex_hit_id = $parser->regex_hit_id;
  1242. $regex_hit_def = $parser->regex_hit_def;
  1243. $regex_hit_accession = $parser->regex_hit_accession;
  1244. // set default if regular expressions have not been specified
  1245. if(!$regex_hit_id){
  1246. $regex_hit_id = '/^(.*?)\s.*$/';
  1247. } else {
  1248. $regex_hit_id = '/'.$regex_hit_id.'/';
  1249. }
  1250. if(!$regex_hit_def){
  1251. $regex_hit_def = '/^.*?\s(.*)$/';
  1252. } else {
  1253. $regex_hit_def = '/'.$regex_hit_def.'/';
  1254. }
  1255. if(!$regex_hit_accession){
  1256. $regex_hit_accession = '/^(.*?)\s.*$/';
  1257. } else {
  1258. $regex_hit_accession = '/'.$regex_hit_accession.'/';
  1259. }
  1260. // Get analysis information
  1261. $blast_object->analysis = $analysis;
  1262. $blast_object->db = $db;
  1263. if (!$db_name) {
  1264. $blast_object->title = $analysis->name;
  1265. } else {
  1266. $blast_object->title = $db_name;
  1267. }
  1268. // Find node id for the analysis
  1269. $ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $analysis->aid));
  1270. $blast_object->ana_nid = $ana_nid;
  1271. $blast_object->ana_time = $analysis->time;
  1272. $blast_object->ana_name = $analysis->name;
  1273. // Load the file. This XML file should be an extract
  1274. // of the original XML file with only a single iteration.
  1275. // An iteration is essentially all the hits for a single
  1276. // query sequence.
  1277. $xml_output = simplexml_load_string($xml_string);
  1278. $iteration = '';
  1279. // new XML file parser has added the feature name within <Iteration_query-def> tags.
  1280. if ($xml_output->getName() == 'Iteration') {
  1281. foreach ($xml_output->children() as $xml_tag) {
  1282. if ($xml_tag->getName() == 'Iteration_query-def') {
  1283. // Here we show the feature name again to check if we pull the correct data
  1284. $blast_object->xml_tag = $xml_tag;
  1285. } else if ($xml_tag->getName() == 'Iteration_hits') {
  1286. $iteration = $xml_tag;
  1287. }
  1288. }
  1289. // This is for the file parsed by the old parser
  1290. } else {
  1291. $iteration = $xml_output;
  1292. }
  1293. $number_hits = 0;
  1294. foreach($iteration->children() as $hits){
  1295. $number_hits ++;
  1296. }
  1297. // add the links for updating blast info using Ajax
  1298. $blast_object->max = $max;
  1299. $blast_object->number_hits = $number_hits;
  1300. $blast_object->feature_id = $feature_id;
  1301. $hits_array = array();
  1302. $hit_count = 0;
  1303. foreach($iteration->children() as $hits){
  1304. $hsp_array = array();
  1305. $counter = 0;
  1306. foreach($hits->children() as $hit){
  1307. $best_evalue = 0;
  1308. $best_identity = 0;
  1309. $best_len = 0;
  1310. $element_name = $hit->getName();
  1311. if($element_name == 'Hit_id'){
  1312. // if parsing "name, acc, desc" from three tags (1/3)
  1313. if ($is_genbank) {
  1314. $hit_name = $hit;
  1315. }
  1316. } else if($element_name == 'Hit_def'){
  1317. if($is_genbank){
  1318. $description = $hit;
  1319. } else {
  1320. $accession = preg_replace($regex_hit_accession,"$1",$hit);
  1321. $hit_name = preg_replace($regex_hit_id,"$1",$hit);
  1322. $description = preg_replace($regex_hit_def,"$1",$hit);
  1323. }
  1324. } else if($element_name == 'Hit_accession'){
  1325. // if parsing "name, acc, desc" from three tags (3/3)
  1326. if ($is_genbank){
  1327. $accession = $hit;
  1328. }
  1329. // now run through each HSP for this hit
  1330. } else if($element_name == 'Hit_hsps'){
  1331. foreach($hit->children() as $hsp){
  1332. foreach($hsp->children() as $hsp_info){
  1333. $element_name = $hsp_info->getName();
  1334. if($element_name == 'Hsp_num'){
  1335. $hsp_num = $hsp_info;
  1336. }
  1337. if($element_name == 'Hsp_bit-score'){
  1338. $hsp_bit_score = $hsp_info;
  1339. }
  1340. if($element_name == 'Hsp_score'){
  1341. $hsp_score = $hsp_info;
  1342. }
  1343. if($element_name == 'Hsp_evalue'){
  1344. $hsp_evalue = $hsp_info;
  1345. // use the first evalue for this set of HSPs
  1346. // as the best evalue. This get's shown as
  1347. // info for the overall match.
  1348. if(!$best_evalue){
  1349. $best_evalue = $hsp_evalue;
  1350. }
  1351. }
  1352. if($element_name == 'Hsp_query-from'){
  1353. $hsp_query_from = $hsp_info;
  1354. }
  1355. if($element_name == 'Hsp_query-to'){
  1356. $hsp_query_to = $hsp_info;
  1357. }
  1358. if($element_name == 'Hsp_hit-from'){
  1359. $hsp_hit_from = $hsp_info;
  1360. }
  1361. if($element_name == 'Hsp_hit-to'){
  1362. $hsp_hit_to = $hsp_info;
  1363. }
  1364. if($element_name == 'Hsp_query-frame'){
  1365. $hsp_query_frame = $hsp_info;
  1366. }
  1367. if($element_name == 'Hsp_identity'){
  1368. $hsp_identity = $hsp_info;
  1369. // use the first evalue for this set of HSPs
  1370. // as the best evalue. This get's shown as
  1371. // info for the overall match.
  1372. if(!$best_identity){
  1373. $best_identity = $hsp_identity;
  1374. }
  1375. }
  1376. if($element_name == 'Hsp_positive'){
  1377. $hsp_positive = $hsp_info;
  1378. }
  1379. if($element_name == 'Hsp_align-len'){
  1380. $hsp_align_len = $hsp_info;
  1381. // use the first evalue for this set of HSPs
  1382. // as the best evalue. This get's shown as
  1383. // info for the overall match.
  1384. if(!$best_len){
  1385. $best_len = $hsp_align_len;
  1386. }
  1387. }
  1388. if($element_name == 'Hsp_qseq'){
  1389. $hsp_qseq = $hsp_info;
  1390. }
  1391. if($element_name == 'Hsp_hseq'){
  1392. $hsp_hseq = $hsp_info;
  1393. }
  1394. if($element_name == 'Hsp_midline'){
  1395. $hsp_midline = $hsp_info;
  1396. }
  1397. }
  1398. $hsp_content = array();
  1399. $hsp_content['hsp_num'] = $hsp_num;
  1400. $hsp_content['bit_score'] = $hsp_bit_score;
  1401. $hsp_content['score'] = $hsp_score;
  1402. $hsp_content['evalue'] = $hsp_evalue;
  1403. $hsp_content['query_frame'] = $hsp_query_frame;
  1404. $hsp_content['qseq'] = $hsp_qseq;
  1405. $hsp_content['midline'] = $hsp_midline;
  1406. $hsp_content['hseq'] = $hsp_hseq;
  1407. $hsp_content['hit_from'] = $hsp_hit_from;
  1408. $hsp_content['hit_to'] = $hsp_hit_to;
  1409. $hsp_content['identity'] = $hsp_identity;
  1410. $hsp_content['align_len'] = $hsp_align_len;
  1411. $hsp_content['positive'] = $hsp_positive;
  1412. $hsp_content['query_from'] = $hsp_query_from;
  1413. $hsp_content['query_to'] = $hsp_query_to;
  1414. $hsp_array[$counter] = $hsp_content;
  1415. $counter ++;
  1416. }
  1417. }
  1418. }
  1419. $arrowr_url = url(drupal_get_path('theme', 'tripal')."/images/arrow_r.png");
  1420. $hits_array[$hit_count]['arrowr_url'] = $arrowr_url;
  1421. $hits_array[$hit_count]['accession'] = $accession;
  1422. $hits_array[$hit_count]['hit_name'] = $hit_name;
  1423. if($accession && $db->urlprefix){
  1424. $hits_array[$hit_count]['hit_url'] = "$db->urlprefix$accession";
  1425. } else {
  1426. // Test if this is another feature in the database
  1427. $sql = "SELECT feature_id FROM {feature} WHERE uniquename = '%s'";
  1428. $previous_db = db_set_active('chado');
  1429. $hit_feature_id = db_result(db_query($sql, $hit_name));
  1430. db_set_active($previous_db);
  1431. // If it is, add link to that feature
  1432. if ($hit_feature_id) {
  1433. $hits_array[$hit_count]['hit_url'] = "ID$hit_feature_id";
  1434. }
  1435. }
  1436. $hits_array[$hit_count]['best_evalue'] = $best_evalue;
  1437. $percent_identity = number_format($best_identity/$best_len*100, 2);
  1438. $hits_array[$hit_count]['percent_identity'] = $percent_identity;
  1439. $hits_array[$hit_count]['description'] = $description;
  1440. $hits_array[$hit_count]['hsp'] = $hsp_array;
  1441. $hit_count ++;
  1442. // if we've hit the maximum number of hits then return
  1443. if($max > 0 && $hit_count >= $max){
  1444. break;
  1445. }
  1446. }
  1447. $blast_object->hits_array = $hits_array;
  1448. return $blast_object;
  1449. }