tripal_analysis_blast.module 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509
  1. <?php
  2. /*******************************************************************************
  3. * Tripal Blast Result lets users show/hide blast results associated
  4. * with a tripal feature
  5. ******************************************************************************/
  6. function tripal_analysis_blast_init(){
  7. // Add javascript and style sheet
  8. drupal_add_css(drupal_get_path('theme', 'tripal').'/css/tripal_analysis_blast.css');
  9. drupal_add_js(drupal_get_path('theme', 'tripal').'/js/tripal_analysis_blast.js');
  10. }
  11. /*******************************************************************************
  12. * tripal_analysis_blast_menu()
  13. * HOOK: Implementation of hook_menu()
  14. * Entry points and paths of the module
  15. */
  16. function tripal_analysis_blast_menu() {
  17. // Show top 10/25/all blast results for ajax calls
  18. $items['tripal_top_blast'] = array(
  19. 'path' => 'top_blast',
  20. 'title' => t('Blast Hits'),
  21. 'page callback' => 'tripal_get_feature_blast_results_ajax',
  22. 'page arguments' => array(1,2,3),
  23. 'access arguments' => array('access content'),
  24. 'type' => MENU_CALLBACK
  25. );
  26. // Show regular expressions for selected database in Blast admin page
  27. $items['admin/tripal/tripal_analysis/tripal_blast_regex/%'] = array(
  28. 'title' => t('Blast Regex'),
  29. 'page callback' => 'tripal_get_blast_regex',
  30. 'page arguments' => array(4),
  31. 'access arguments' => array('administer site configuration'),
  32. 'type' => MENU_CALLBACK
  33. );
  34. return $items;
  35. }
  36. /*******************************************************************************
  37. * tripal_analysis_blast_nodeapi()
  38. * HOOK: Implementation of hook_nodeapi()
  39. * Display blast results for allowed node types
  40. */
  41. function tripal_analysis_blast_nodeapi(&$node, $op, $teaser, $page) {
  42. switch ($op) {
  43. case 'view':
  44. // Find out which node types for showing the blast
  45. $types_to_show = variable_get('tripal_analysis_blast_setting',
  46. array('chado_feature'));
  47. // Abort if this node is not one of the types we should show.
  48. if (!in_array($node->type, $types_to_show, TRUE)) {
  49. break;
  50. }
  51. // Add blast to the content item if it's not a teaser
  52. if (!$teaser && $node->feature->feature_id) {
  53. if($node->build_mode == NODE_BUILD_SEARCH_INDEX){
  54. $node->content['tripal_analysis_blast_index_version'] = array(
  55. '#value' => theme('tripal_analysis_blast_results_index_version',$node),
  56. '#weight' => 8,
  57. );
  58. } else {
  59. if(strcmp($node->type,'chado_feature')==0){
  60. // Show blast result if not at teaser view
  61. $node->content['tripal_feature_blast_results'] = array(
  62. '#value' => theme('tripal_feature_blast_results', $node),
  63. '#weight' => 8
  64. );
  65. }
  66. }
  67. }
  68. }
  69. }
  70. /************************************************************************
  71. * We need to let drupal know about our theme functions and their arguments.
  72. * We create theme functions to allow users of the module to customize the
  73. * look and feel of the output generated in this module
  74. */
  75. function tripal_analysis_blast_theme () {
  76. return array(
  77. 'tripal_analysis_blast_results_index_version' => array (
  78. 'arguments' => array('node'),
  79. ),
  80. 'tripal_feature_blast_results' => array(
  81. 'arguments' => array('node'=> null),
  82. 'template' => 'tripal_feature_blast_results',
  83. )
  84. );
  85. }
  86. /*******************************************************************************
  87. *
  88. */
  89. function tripal_get_feature_blast_results_ajax($feature_id, $db_id, $max){
  90. $sql = "SELECT nid FROM {chado_feature} WHERE feature_id = %d";
  91. $nid = db_fetch_object(db_query($sql,$feature_id));
  92. $node = node_load($nid->nid);
  93. // add the additional variables that the theme needs to generate the output
  94. $node->db_id = $db_id;
  95. $node->max = $max;
  96. // call the theme to rebuild the blast results
  97. drupal_json(array('update' => theme('tripal_feature_blast_results',$node)));
  98. }
  99. /*******************************************************************************
  100. *
  101. */
  102. function tripal_analysis_blast_preprocess_tripal_feature_blast_results(&$variables){
  103. $feature = $variables['node']->feature;
  104. $db_id = $variables['node']->db_id;
  105. $max = 10;
  106. if(isset($variables['node']->max)){
  107. $max = $variables['node']->max;
  108. }
  109. $blast_results = tripal_get_feature_blast_results($feature->feature_id, $db_id, $max);
  110. $variables['tripal_analysis_blast']['blast_results_list'] = $blast_results;
  111. }
  112. /*******************************************************************************
  113. * Prepare blast result for the feature shown on the page
  114. */
  115. function theme_tripal_analysis_blast_results_index_version ($node) {
  116. $feature = $node->feature;
  117. $content = tripal_get_blast_results_index_version($feature->feature_id);
  118. return $content;
  119. }
  120. /*******************************************************************************
  121. * tripal_get_feature_blast_results()
  122. * Get blast result from featureprop table for the feature
  123. */
  124. function tripal_get_feature_blast_results($feature_id, $db_id, $max){
  125. // Get the blast results stored as XML from the analysisfeatureprop table
  126. // the type for the property is named 'analysis_blast_output_iteration_hits'
  127. // and is found in the 'tripal' controlled vocabulary. This CV term was
  128. // added by this module.
  129. $sql = "SELECT AP.value AS apvalue, AFP.value AS afpvalue, AF.analysis_id AS aid
  130. FROM {analysisfeatureprop} AFP
  131. INNER JOIN {analysisfeature} AF ON AF.analysisfeature_id = AFP.analysisfeature_id
  132. INNER JOIN {analysisprop} AP ON AP.analysis_id = AF.analysis_id
  133. INNER JOIN {cvterm} CVT on AFP.type_id = CVT.cvterm_id
  134. INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
  135. WHERE AF.feature_id = %d AND CV.name = '%s' AND
  136. CVT.name = '%s' AND AP.value like '%|%' ";
  137. $previous_db = tripal_db_set_active('chado');
  138. $result = db_query($sql, $feature_id,'tripal','analysis_blast_output_iteration_hits');
  139. tripal_db_set_active($previous_db);
  140. // get the HTML content for viewing each of the XML file
  141. $blast_obj_array = array ();
  142. $blast_obj_counter = 0;
  143. while ($analysisfeatureprop = db_fetch_object($result)) {
  144. // the database db_id is stored in the value field of the table
  145. // along with the XML. The two are separated by a bar. We
  146. // will separate these two:
  147. $blastsettings = explode("|", $analysisfeatureprop->apvalue);
  148. // if we don't have the proper number of fields in the value column then
  149. // skip this entry
  150. if(count($blastsettings) != 3){
  151. continue;
  152. }
  153. if(!$blastsettings[0]){
  154. continue;
  155. }
  156. $att_db_id = $blastsettings[0];
  157. // get analysis name and date
  158. $previous_db = tripal_db_set_active('chado');
  159. $sql = "SELECT analysis_id AS aid, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  160. FROM {analysis}
  161. WHERE analysis_id = %d";
  162. $analysis = db_fetch_object(db_query($sql, $analysisfeatureprop->aid));
  163. tripal_db_set_active($previous_db);
  164. // Get db object using the db_id
  165. $previous_db = tripal_db_set_active('chado');
  166. $sql = "SELECT * FROM {db} WHERE db_id=%d";
  167. $db = db_fetch_object(db_query($sql, $att_db_id));
  168. tripal_db_set_active($previous_db);
  169. // we want to add this blast result to our list if a database id has
  170. // not been specified or if it has been specified and the database id
  171. // for this analysis matches that of the one requested
  172. if(!$db_id or ($db_id and $att_db_id == $db_id)) {
  173. $blast_obj = tripal_analysis_blast_get_result_object($analysisfeatureprop->afpvalue,$db,$max,$feature_id, $analysis);
  174. $blast_obj->analysis = $analysis;
  175. $blast_obj_array [$blast_obj_counter] = $blast_obj;
  176. $blast_obj_counter ++;
  177. }
  178. }
  179. return $blast_obj_array;
  180. }
  181. /*******************************************************************************
  182. * Scanning the file folder for blast results and prepare content for indexing
  183. */
  184. function tripal_get_blast_results_index_version ($feature_id){
  185. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  186. // for inserting into the analysisfeatureprop table
  187. $previous_db = tripal_db_set_active('chado');
  188. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  189. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  190. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  191. "AND CV.name = 'tripal'";
  192. $type_id = db_result(db_query($sql));
  193. // Get xml string from analysisfeatureprop value column, get db_id from analysisprop value column
  194. // , and get analysis_id from analysisfeature table
  195. $sql = "SELECT AP.value AS apvalue, AFP.value AS afpvalue, AF.analysis_id AS aid
  196. FROM {analysisfeatureprop} AFP
  197. INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id
  198. INNER JOIN analysisprop AP ON AP.analysis_id = AF.analysis_id
  199. WHERE feature_id = %d
  200. AND AFP.type_id = %d ";
  201. $result = db_query($sql, $feature_id, $type_id);
  202. tripal_db_set_active($previous_db);
  203. // get the HTML content for viewing each of the XML file
  204. while ($analysisfeatureprop = db_fetch_object($result)) {
  205. // get analysis name and date
  206. $previous_db = tripal_db_set_active('chado');
  207. $sql = "SELECT analysis_id AS aid, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  208. FROM {analysis} WHERE analysis_id = %d";
  209. $analysis = db_fetch_object(db_query($sql, $analysisfeatureprop->aid));
  210. tripal_db_set_active($previous_db);
  211. $blastsettings = explode("|", $analysisfeatureprop->apvalue);
  212. $att_db_id = $blastsettings [0];
  213. // Get db object using the db_id
  214. $previous_db = tripal_db_set_active('chado');
  215. $sql = "SELECT * FROM {db} WHERE db_id=%d";
  216. $db = db_fetch_object(db_query($sql, $att_db_id));
  217. tripal_db_set_active($previous_db);
  218. // Only index best 10 hits because the default page only shows 10 blast results
  219. $max = 10;
  220. $content .= parse_NCBI_Blast_XML_index_version($analysisfeatureprop->afpvalue,$db,$max,$feature_id,$ajax, $analysis);
  221. }
  222. return $content;
  223. }
  224. /*******************************************************************************
  225. * Parse NCBI Blast results for indexing so that user can use blast results to
  226. * find corresponding features
  227. */
  228. function parse_NCBI_Blast_XML_index_version($xml_string,$db,$feature_id) {
  229. // Get the parser using db_id
  230. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  231. $parser = db_fetch_object(db_query($sql, $db->db_id));
  232. $db_name = $parser->displayname;
  233. $is_genbank = $parser->genbank_style;
  234. $regex_hit_id = $parser->regex_hit_id;
  235. $regex_hit_def = $parser->regex_hit_def;
  236. $regex_hit_accession = $parser->regex_hit_accession;
  237. // set default if regular expressions have not been specified
  238. if(!$regex_hit_id){
  239. $regex_hit_id = '/^(.*?)\s.*$/';
  240. } else {
  241. $regex_hit_id = '/'.$regex_hit_id.'/';
  242. }
  243. if(!$regex_hit_def){
  244. $regex_hit_def = '/^.*?\s(.*)$/';
  245. } else {
  246. $regex_hit_def = '/'.$regex_hit_def.'/';
  247. }
  248. if(!$regex_hit_accession){
  249. $regex_hit_accession = '/^(.*?)\s.*$/';
  250. } else {
  251. $regex_hit_accession = '/'.$regex_hit_accession.'/';
  252. }
  253. $html_out .= "<h3>$db_name</h3>";
  254. // Load the file. This XML file should be an extract
  255. // of the original XML file with only a single iteration.
  256. // An iteration is essentially all the hits for a single
  257. // query sequence.
  258. $xml_output = simplexml_load_string($xml_string);
  259. $iteration = '';
  260. // new XML file parser has added the feature name within <Iteration_query-def> tags.
  261. if ($xml_output->getName() == 'Iteration') {
  262. foreach ($xml_output->children() as $xml_tag) {
  263. if ($xml_tag->getName() == 'Iteration_query-def') {
  264. // Here we show the feature name again to check if we pull the correct data
  265. $html_out .= "Query: $xml_tag<br>";
  266. } else if ($xml_tag->getName() == 'Iteration_hits') {
  267. $iteration = $xml_tag;
  268. }
  269. }
  270. // This is for the file parsed by the old parser
  271. } else {
  272. $iteration = $xml_output;
  273. }
  274. // now run through the blast hits/hsps of this iteration
  275. // and generate the rows of the table
  276. foreach($iteration->children() as $hits){
  277. $best_evalue = 0;
  278. foreach($hits->children() as $hit){
  279. $best_evalue = 0;
  280. $element_name = $hit->getName();
  281. if($element_name == 'Hit_id'){
  282. // if parsing "name, acc, desc" from three tags (1/3)
  283. if ($is_genbank) {
  284. $hit_name = $hit;
  285. }
  286. } else if($element_name == 'Hit_def'){
  287. if($is_genbank){
  288. $description = $hit;
  289. } else {
  290. $accession = preg_replace($regex_hit_accession,"$1",$hit);
  291. $hit_name = preg_replace($regex_hit_id,"$1",$hit);
  292. $description = preg_replace($regex_hit_def,"$1",$hit);
  293. }
  294. } else if($element_name == 'Hit_accession'){
  295. // if parsing "name, acc, desc" from three tags (3/3)
  296. if ($is_genbank){
  297. $accession = $hit;
  298. }
  299. // now run through each HSP for this hit
  300. }
  301. }
  302. $html_out .= "<p>$hit_name<br>";
  303. $html_out .= "$accession<br>";
  304. $html_out .= "<b>$description</b></br>";
  305. $hsp_html_out = '';
  306. }
  307. return $html_out;
  308. }
  309. /*******************************************************************************
  310. * Tripal Blast administrative setting form. This function is called by
  311. * tripal_analysis module which asks for an admin form to show on the page
  312. */
  313. function tripal_analysis_blast_get_settings() {
  314. // Get an array of node types with internal names as keys
  315. $options = node_get_types('names');
  316. // Add 'chado_feature' to allowed content types for showing blast results
  317. $allowedoptions ['chado_feature'] = "Show blast results on feature pages";
  318. $form['description'] = array(
  319. '#type' => 'item',
  320. '#value' => t("Most chado features were analyzed by blast against major sequence databases. This option allows user to display the blast analysis results. Please read user manual for storage and display of blast files. Check the box to enable the analysis results. Uncheck to disable it."),
  321. '#weight' => 0,
  322. );
  323. $form['tripal_analysis_blast_setting'] = array(
  324. '#type' => 'checkboxes',
  325. '#options' => $allowedoptions,
  326. '#default_value' => variable_get('tripal_analysis_blast_setting',
  327. array('chado_feature')),
  328. );
  329. $form['blast_parser'] = array(
  330. '#title' => t('Blast Parser Settings'),
  331. '#type' => 'fieldset',
  332. '#description' => t('Configure parsers for showing blast results. Each database is '.
  333. 'allowed to have one xml parser.'),
  334. '#weight' => 10
  335. );
  336. $previous_db = tripal_db_set_active('chado'); // use chado database
  337. // get a list of db from chado for user to choose
  338. $sql = 'SELECT db_id, name FROM {db} ORDER BY lower(name)';
  339. $results = db_query ($sql);
  340. $blastdbs = array();
  341. while ($db = db_fetch_object($results)){
  342. $blastdbs[$db->db_id] = $db->name;
  343. }
  344. $form['db_options'] = array(
  345. '#type' => 'value',
  346. '#value' => $blastdbs
  347. );
  348. $form['blast_parser']['blastdb'] = array(
  349. '#title' => t('Database'),
  350. '#type' => 'select',
  351. '#description' => t('The database used for the blast analysis.'),
  352. '#options' => $form['db_options']['#value'],
  353. '#attributes' => array(
  354. 'onChange' => "return tripal_update_regex(this)",
  355. )
  356. );
  357. $form['blast_parser']['displayname'] = array(
  358. '#title' => t('Title for the blast analysis'),
  359. '#type' => 'textfield',
  360. );
  361. $form['blast_parser']['gb_style_parser'] = array(
  362. '#title' => t('Use Genebank style parser. This will clear all regular expression settings for the selected database.'),
  363. '#type' => 'checkbox',
  364. '#attributes' => array(
  365. 'onClick' => "return tripal_set_genbank_style(this)",
  366. )
  367. );
  368. $form['blast_parser']['hit_id'] = array(
  369. '#title' => t('Regular expression for Hit Name'),
  370. '#type' => 'textfield',
  371. );
  372. $form['blast_parser']['hit_def'] = array(
  373. '#title' => t('Regular expression for Hit Description'),
  374. '#type' => 'textfield',
  375. );
  376. $form['blast_parser']['hit_accession'] = array(
  377. '#title' => t('Regular expression for Hit Accession'),
  378. '#type' => 'textfield',
  379. );
  380. $form['blast_parser']['button'] = array(
  381. '#type' => 'submit',
  382. '#value' => t('Save settings')
  383. );
  384. tripal_db_set_active($previous_db); // use drupal database
  385. $settings->form = $form;
  386. $settings->title = "Tripal Blast";
  387. return $settings;
  388. }
  389. /*******************************************************************************
  390. * Parse Blast XML Output file into analysisfeatureprop table
  391. */
  392. function tripal_analysis_blast_parseXMLFile ($analysis_id, $blastdb, $blastfile, $job_id) {
  393. // Prepare log
  394. $filename = preg_replace("/.*\/(.*)/", "$1", $blastfile);
  395. $logfile = file_directory_path() . "/tripal/tripal_analysis_blast/load_$filename.log";
  396. $log = fopen($logfile, 'a'); // append parsing results to log file
  397. // If user input a file (e.g. blast.xml)
  398. if (is_file($blastfile)) {
  399. // Parsing started
  400. print "Parsing File:".$blastfile." ...\n";
  401. fwrite($log, date("D M j G:i:s Y").". Loading $blastfile\n");
  402. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  403. // for inserting into the analysisfeatureprop table
  404. $previous_db = tripal_db_set_active('chado'); // use chado database
  405. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  406. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  407. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  408. "AND CV.name = 'tripal'";
  409. $type_id = db_result(db_query($sql));
  410. // Load the XML file.
  411. $blastoutput = simplexml_load_file($blastfile);
  412. $no_iterations = 0;
  413. foreach($blastoutput->children() as $tmp) {
  414. if ($tmp->getName() == 'BlastOutput_iterations') {
  415. foreach($tmp->children() as $itr) {
  416. if ($itr->getName() == 'Iteration') {
  417. $no_iterations ++;
  418. }
  419. }
  420. }
  421. }
  422. print "$no_iterations iterations to be processed.\n";
  423. $interval = intval($no_iterations * 0.01);
  424. $idx_iterations = 0;
  425. foreach ($blastoutput->children() as $blastoutput_tags) {
  426. if ($blastoutput_tags->getName() == 'BlastOutput_iterations') {
  427. foreach($blastoutput_tags->children() as $iterations) {
  428. if ($iterations->getName() == 'Iteration') {
  429. // Set job status
  430. $idx_iterations ++;
  431. if ($idx_iterations % $interval == 0) {
  432. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  433. tripal_db_set_active($previous_db);
  434. tripal_job_set_progress($job_id, $percentage);
  435. $previous_db = tripal_db_set_active('chado');
  436. print $percentage."% ";
  437. }
  438. // now run through the blast hits/hsps of this iteration
  439. // and generate the rows of the table
  440. $feature_id = 0;
  441. foreach($iterations->children() as $iteration_tags) {
  442. // Match chado feature uniquename with <Iteration_query-def>
  443. // and get the feature_id
  444. $featurenaem_xml = '';
  445. if($iteration_tags->getName() == 'Iteration_query-def'){
  446. // If the Iteration_query-def in the format of "feature_id|uniquename"
  447. // get feature_id from it directly
  448. if (preg_match("/^(\d+)\|.+/", $iteration_tags, $matches)) {
  449. $feature_id = $matches[1];
  450. // If not in above format, try to match <Iteration_query-def> to feature's uniquename
  451. } else {
  452. // treat the first word of <Iteration_query-def> as uniquename
  453. $first_word = $iteration_tags;
  454. if (preg_match('/^(.*?)\s.*$/', $iteration_tags, $matches)) {
  455. $first_word = $matches[1];
  456. }
  457. // Find out how many features match this uniquename
  458. $sql = "SELECT count(feature_id) FROM {feature} ".
  459. "WHERE uniquename = '%s' ";
  460. $no_features = db_result(db_query($sql, $first_word));
  461. // If there is only one match, get the feature_id
  462. if ($no_features == 1) {
  463. $sql = "SELECT feature_id FROM {feature} ".
  464. "WHERE uniquename = '%s' ";
  465. $feature_id = db_result(db_query($sql, $first_word));
  466. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  467. } else if ($no_features > 1) {
  468. fwrite($log, "Ambiguous: ".$first_word." matches more than one feature and is not processed.\n");
  469. continue;
  470. // If the uniquename did not match, skip and print 'Failed'
  471. } else {
  472. fwrite($log, "Failed: ".$first_word."\n");
  473. }
  474. }
  475. // Successfully matched. print 'Succeeded'
  476. if ($feature_id) {
  477. fwrite($log, "Succeeded: ".$first_word." => feature id:".$feature_id);
  478. $featurename_xml = $iteration_tags->asXML();
  479. }
  480. // Insert Iteration_hits into analysisfeatureprop and analysisfeature tables
  481. } else if($iteration_tags->getName() == 'Iteration_hits'){
  482. if ($feature_id) {
  483. // Make sure this iteration doesn't exist in analysisfeatureprop. If it does, update but not insert
  484. $sql = "SELECT analysisfeatureprop_id FROM {analysisfeatureprop} AFP ".
  485. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  486. "WHERE feature_id=%d ".
  487. "AND analysis_id=%d ".
  488. "AND type_id=%d ";
  489. $result = db_query($sql, $feature_id, $analysis_id, $type_id);
  490. $analysisfeatureprop = db_fetch_object($result);
  491. $xml_content = "<Iteration>\n".$featurename_xml."\n".$iteration_tags->asXML()."\n</Iteration>";
  492. // If this Iteration_hits already exists, update it
  493. if ($analysisfeatureprop) {
  494. $sql = "UPDATE {analysisfeatureprop} ".
  495. "SET value = '%s' ".
  496. "WHERE analysisfeatureprop_id = %d ";
  497. db_query($sql, $xml_content, $analysisfeatureprop->analysisfeatureprop_id);
  498. fwrite($log, " (Update)\n"); // write to log
  499. // Otherwise, insert the Iteration_hits into analysisfeature and analysisfeatureprop tables
  500. } else {
  501. //------------------------------------------------------
  502. // Insert into analysisfeature table
  503. //------------------------------------------------------
  504. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  505. "VALUES (%d, %d)";
  506. db_query ($sql, $feature_id, $analysis_id);
  507. // Get the newly inserted analysisfeature_id
  508. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
  509. $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
  510. //------------------------------------------------------
  511. // Insert into analysisfeatureprop table
  512. //------------------------------------------------------
  513. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  514. "VALUES (%d, %d, '%s', %d)";
  515. db_query($sql, $analysisfeature_id, $type_id, $xml_content, '0');
  516. fwrite($log, " (Insert)\n"); // write to log
  517. }
  518. }
  519. }
  520. }
  521. }
  522. }
  523. }
  524. }
  525. tripal_db_set_active ($previous_db); // Use drupal database
  526. // Otherwise, $blastfile is a directory. Iterate through all xml files in it
  527. } else {
  528. $dir_handle = @opendir($blastfile) or die("Unable to open $blastfile");
  529. $pattern = sql_regcase($blastfile . "/*.XML");
  530. $total_files = count(glob($pattern));
  531. print "$total_files file(s) to be parsed.\n";
  532. $interval = intval($total_files * 0.01);
  533. $no_file = 0;
  534. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  535. // for inserting into the analysisfeatureprop table
  536. $previous_db = tripal_db_set_active('chado'); // use chado database
  537. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  538. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  539. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  540. "AND CV.name = 'tripal'";
  541. $type_id = db_result(db_query($sql));
  542. // Parsing all files in the directory
  543. while ($file = readdir($dir_handle)) {
  544. if(preg_match("/^.*\.XML/i",$file)){
  545. // Set job status
  546. if ($no_file % $interval == 0) {
  547. $percentage = (int) ($no_file / $total_files * 100);
  548. tripal_db_set_active($previous_db);
  549. tripal_job_set_progress($job_id, $percentage);
  550. $previous_db = tripal_db_set_active('chado');
  551. print $percentage."% ";
  552. }
  553. // Parsing started
  554. print "Parsing File:".$file.". ";
  555. fwrite($log, date("D M j G:i:s Y").". Loading $file\n");
  556. // Load the XML file.
  557. $blastoutput = simplexml_load_file($blastfile."/".$file);
  558. $no_iterations = 0;
  559. foreach($blastoutput->children() as $tmp) {
  560. if ($tmp->getName() == 'BlastOutput_iterations') {
  561. foreach($tmp->children() as $itr) {
  562. if ($itr->getName() == 'Iteration') {
  563. $no_iterations ++;
  564. }
  565. }
  566. }
  567. }
  568. print "$no_iterations iterations to be processed.\n";
  569. foreach ($blastoutput->children() as $blastoutput_tags) {
  570. if ($blastoutput_tags->getName() == 'BlastOutput_iterations') {
  571. foreach($blastoutput_tags->children() as $iterations) {
  572. if ($iterations->getName() == 'Iteration') {
  573. // now run through the blast hits/hsps of this iteration
  574. // and generate the rows of the table
  575. $feature_id = 0;
  576. foreach($iterations->children() as $iteration_tags) {
  577. // Match chado feature uniquename with <Iteration_query-def>
  578. // and get the feature_id
  579. $featurenaem_xml = '';
  580. if($iteration_tags->getName() == 'Iteration_query-def'){
  581. // If the Iteration_query-def in the format of "feature_id|uniquename"
  582. // get feature_id from it directly
  583. if (preg_match("/^(\d+)\|.+/", $iteration_tags, $matches)) {
  584. $feature_id = $matches[1];
  585. // If not in above format, try to match <Iteration_query-def> to feature's uniquename
  586. } else {
  587. // treat the first word of <Iteration_query-def> as uniquename
  588. $first_word = $iteration_tags;
  589. if (preg_match('/^(.*?)\s.*$/', $iteration_tags, $matches)) {
  590. $first_word = $matches[1];
  591. }
  592. // Find out how many features match this uniquename
  593. $sql = "SELECT count(feature_id) FROM {feature} ".
  594. "WHERE uniquename = '%s' ";
  595. $no_features = db_result(db_query($sql, $first_word));
  596. // If there is only one match, get the feature_id
  597. if ($no_features == 1) {
  598. $sql = "SELECT feature_id FROM {feature} ".
  599. "WHERE uniquename = '%s' ";
  600. $feature_id = db_result(db_query($sql, $first_word));
  601. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  602. } else if ($no_features > 1) {
  603. fwrite($log, "Ambiguous: ".$first_word." matches more than one feature and is not processed.\n");
  604. continue;
  605. // If the uniquename did not match, skip and print 'Failed'
  606. } else {
  607. fwrite($log, "Failed: ".$first_word."\n");
  608. }
  609. }
  610. // Successfully matched. print 'Succeeded'
  611. if ($feature_id) {
  612. fwrite($log, "Succeeded: ".$first_word." => feature id:".$feature_id);
  613. $featurename_xml = $iteration_tags->asXML();
  614. }
  615. // Insert Iteration_hits into analysisfeatureprop and analysisfeature tables
  616. } else if($iteration_tags->getName() == 'Iteration_hits'){
  617. if ($feature_id) {
  618. // Make sure this iteration doesn't exist in analysisfeatureprop. If it does, update but not insert
  619. $sql = "SELECT analysisfeatureprop_id FROM {analysisfeatureprop} AFP ".
  620. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  621. "WHERE feature_id=%d ".
  622. "AND analysis_id=%d ".
  623. "AND type_id=%d ";
  624. $result = db_query($sql, $feature_id, $analysis_id, $type_id);
  625. $analysisfeatureprop = db_fetch_object($result);
  626. $xml_content = "<Iteration>\n".$featurename_xml."\n".$iteration_tags->asXML()."\n</Iteration>";
  627. // If this Iteration_hits already exists, update it
  628. if ($analysisfeatureprop) {
  629. $sql = "UPDATE {analysisfeatureprop} ".
  630. "SET value = '%s' ".
  631. "WHERE analysisfeatureprop_id = %d ";
  632. db_query($sql, $xml_content, $analysisfeatureprop->analysisfeatureprop_id);
  633. fwrite($log, " (Update)\n"); // write to log
  634. // Otherwise, insert the Iteration_hits into analysisfeature and analysisfeatureprop tables
  635. } else {
  636. //------------------------------------------------------
  637. // Insert into analysisfeature table
  638. //------------------------------------------------------
  639. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  640. "VALUES (%d, %d)";
  641. db_query ($sql, $feature_id, $analysis_id);
  642. // Get the newly inserted analysisfeature_id
  643. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
  644. $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
  645. //------------------------------------------------------
  646. // Insert into analysisfeatureprop table
  647. //------------------------------------------------------
  648. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  649. "VALUES (%d, %d, '%s', %d)";
  650. db_query($sql, $analysisfeature_id, $type_id, $xml_content, '0');
  651. fwrite($log, " (Insert)\n"); // write to log
  652. }
  653. }
  654. }
  655. }
  656. }
  657. }
  658. }
  659. }
  660. $no_file ++;
  661. }
  662. }
  663. tripal_db_set_active ($previous_db); // Use drupal database
  664. }
  665. print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
  666. fwrite($log, "\n");
  667. fclose($log);
  668. return;
  669. }
  670. /*******************************************************************************
  671. * This function is only called by ajax to get regular expressions for blast
  672. * admin page
  673. */
  674. function tripal_get_blast_regex ($db_id) {
  675. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  676. $blast_regexs = db_fetch_object(db_query($sql, $db_id));
  677. drupal_json(array(
  678. 'name' => $blast_regexs->displayname,
  679. 'genbank_style' => $blast_regexs->genbank_style,
  680. 'reg1' => $blast_regexs->regex_hit_id,
  681. 'reg2' => $blast_regexs->regex_hit_def,
  682. 'reg3' => $blast_regexs->regex_hit_accession)
  683. );
  684. }
  685. /*******************************************************************************
  686. * Provide information to drupal about the node types that we're creating
  687. * in this module
  688. */
  689. function tripal_analysis_blast_node_info() {
  690. $nodes = array();
  691. $nodes['chado_analysis_blast'] = array(
  692. 'name' => t('Analysis: Blast'),
  693. 'module' => 'chado_analysis_blast',
  694. 'description' => t('A blast analysis from the chado database'),
  695. 'has_title' => FALSE,
  696. 'title_label' => t('Analysis: Blast'),
  697. 'has_body' => FALSE,
  698. 'body_label' => t('Blast Analysis Description'),
  699. 'locked' => TRUE
  700. );
  701. return $nodes;
  702. }
  703. /*******************************************************************************
  704. * Provide a Blast Analysis form
  705. */
  706. function chado_analysis_blast_form ($node){
  707. //dprint_r($node);
  708. $type = node_get_types('type', $node);
  709. $form = array();
  710. $form['title']= array(
  711. '#type' => 'hidden',
  712. '#default_value' => $node->title,
  713. );
  714. // add in the analysis fields
  715. $analysis_form_elements = chado_analysis_form(null);
  716. foreach ($analysis_form_elements as $key => $element){
  717. $form[$key] = $element;
  718. }
  719. // Blast specific settings
  720. if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {
  721. $prop_values = explode("|", $node->blastdb);
  722. $node->blastdb = $prop_values[0];
  723. $node->blastfile = $prop_values[1];
  724. $node->blastparameters = $prop_values[2];
  725. }
  726. $form['blast'] = array(
  727. '#title' => t('Blast Settings'),
  728. '#type' => 'fieldset',
  729. '#description' => t('Specific Settings for Blast Analysis.'),
  730. '#collapsible' => TRUE,
  731. '#attributes' => array('id' => 'blast-extra-settings'),
  732. '#weight' => 11
  733. );
  734. $previous_db = tripal_db_set_active('chado'); // use chado database
  735. // get a list of db from chado for user to choose
  736. $sql = 'SELECT db_id, name FROM {db} ORDER BY lower(name)';
  737. $results = db_query ($sql);
  738. tripal_db_set_active($previous_db);
  739. $blastdbs = array();
  740. while ($db = db_fetch_object($results)){
  741. $blastdbs[$db->db_id] = $db->name;
  742. }
  743. $form['db_options'] = array(
  744. '#type' => 'value',
  745. '#value' => $blastdbs
  746. );
  747. $form['blast']['blastdb'] = array(
  748. '#title' => t('Database'),
  749. '#type' => 'select',
  750. '#description' => t('The database used for the blast analysis.'),
  751. '#options' => $form['db_options']['#value'],
  752. '#default_value' => $node->blastdb,
  753. );
  754. $form['blast']['blastfile'] = array(
  755. '#title' => t('Blast xml File: (if you input a directory without the tailing slash, all xml files in the directory will be loaded)'),
  756. '#type' => 'textfield',
  757. '#description' => t('The xml output file generated by blast in full path.'),
  758. '#default_value' => $node->blastfile,
  759. );
  760. $form['blast']['blastjob'] = array(
  761. '#type' => 'checkbox',
  762. '#title' => t('Submit a job to parse the xml output into analysisfeatureprop table'),
  763. '#description' => t('Note: features associated with the blast results must '.
  764. 'exist in chado before parsing the file. Otherwise, blast '.
  765. 'results that cannot be linked to a feature will be '.
  766. 'discarded. Also, Triapl Blast module needs to be enabled.'),
  767. '#default_value' => $node->blastjob
  768. );
  769. $form['blast']['blastparameters'] = array(
  770. '#title' => t('Parameters'),
  771. '#type' => 'textfield',
  772. '#description' => t('The parameters for running the blast analysis.'),
  773. '#default_value' => $node->blastparameters,
  774. );
  775. return $form;
  776. }
  777. function chado_analysis_blast_validate($node, &$form){
  778. ##dprint_r($node);
  779. // This validation is being used for three activities:
  780. // CASE A: Update a node that exists in both drupal and chado
  781. // CASE B: Synchronizing a node from chado to drupal
  782. // CASE C: Inserting a new node that exists in niether drupal nor chado
  783. // Only nodes being updated will have an nid already
  784. if($node->nid){
  785. //---------------------------------------------------
  786. // CASE A: We are validating a form for updating an existing node
  787. //---------------------------------------------------
  788. // TO DO: check that the new fields don't yield a non-unique primary key in chado
  789. }
  790. else{
  791. // To differentiate if we are syncing or creating a new analysis altogther, see if an
  792. // analysis_id already exists
  793. if($node->analysis_id){
  794. //---------------------------------------------------
  795. // CASE B: Synchronizing a node from chado to drupal
  796. //---------------------------------------------------
  797. }
  798. else{
  799. //---------------------------------------------------
  800. // CASE C: We are validating a form for inserting a new node
  801. //---------------------------------------------------
  802. // The primary key for the chado analysis table is
  803. // program, programversion, sourcename
  804. // Check to see if this analysis really is new -ie, it doesn't have the same
  805. // primary key as any other analysis
  806. $sql = "SELECT analysis_id ".
  807. "FROM {analysis} ".
  808. "WHERE program='%s'".
  809. "AND programversion='%s'".
  810. "AND sourcename='%s'";
  811. $previous_db = tripal_db_set_active('chado');
  812. $analysis_id = db_result(db_query($sql, $node->program, $node->programversion, $node->sourcename));
  813. tripal_db_set_active($previous_db);
  814. if($analysis_id){
  815. //---------------------------------------------------
  816. // this primary key already exists in chado analysis table!
  817. //---------------------------------------------------
  818. // check to see if it has also been synced with drupal
  819. $sql = "SELECT nid FROM {chado_analysis} ".
  820. "WHERE analysis_id = %d";
  821. $node_id = db_result(db_query($sql, $analysis_id));
  822. if($node_id){
  823. //---------------------------------------------------
  824. // the analysis has already been synced with drupal, redirect the user
  825. // to modify that node or start over
  826. //---------------------------------------------------
  827. $error = 'This analysis already exists in the chado database (analysis id ';
  828. $error .= $analysis_id.') and has been synchronized ';
  829. $error .= 'with drupal. See node '.$node_id.' if you wish to update that analysis. ';
  830. $error .= ' For a new analysis, please select a unique primary key ';
  831. $error .= '(primary key consists of sourcename, program and programversion).';
  832. form_set_error('sourcename', t($error));
  833. }
  834. else{
  835. //---------------------------------------------------
  836. // the analysis does not exist in drupal - tell the user
  837. // to sync from chado or create a new unique primary key
  838. //---------------------------------------------------
  839. $error = 'This analysis already exists in the chado database (analysis id ';
  840. $error .= $analysis_id.') but has not been synchronized ';
  841. $error .= 'with drupal. See the tripal admin pages to synchronize. ';
  842. $error .= ' For a new analysis, please select a unique primary key ';
  843. $error .= '(primary key consists of sourcename, program and programversion).';
  844. form_set_error('sourcename', t($error));
  845. }
  846. }
  847. }
  848. }
  849. }
  850. function chado_analysis_blast_insert($node){
  851. global $user;
  852. // Create a timestamp so we can insert it into the chado database
  853. $time = $node->timeexecuted;
  854. $month = $time['month'];
  855. $day = $time['day'];
  856. $year = $time['year'];
  857. $timestamp = $month.'/'.$day.'/'.$year;
  858. //---------------------------------------------------
  859. // First add the item to the chado analysis table
  860. //---------------------------------------------------
  861. $sql = "INSERT INTO {analysis} ".
  862. " (name, description, program, programversion, algorithm, ".
  863. " sourcename, sourceversion, sourceuri, timeexecuted) ".
  864. "VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s')";
  865. $previous_db = tripal_db_set_active('chado'); // use chado database
  866. db_query($sql,$node->analysisname, $node->description,
  867. $node->program,$node->programversion,$node->algorithm,
  868. $node->sourcename, $node->sourceversion, $node->sourceuri,
  869. $timestamp);
  870. // find the newly entered analysis_id
  871. $sql = "SELECT analysis_id ".
  872. "FROM {analysis} ".
  873. "WHERE program='%s'".
  874. "AND programversion='%s'".
  875. "AND sourcename='%s'";
  876. $analysis_id = db_result(db_query($sql, $node->program,
  877. $node->programversion, $node->sourcename));
  878. // Get cvterm_id for 'analysis_blast_settings'
  879. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  880. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  881. "WHERE CVT.name = 'analysis_blast_settings' ".
  882. "AND CV.name = 'tripal'";
  883. $type_id = db_result(db_query($sql));
  884. //---------------------------------------------------
  885. // Insert into chado {analysisprop} table
  886. //---------------------------------------------------
  887. $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) ".
  888. "VALUES (%d, %d, '%s')";
  889. $blastsettings = $node->blastdb."|".$node->blastfile."|".$node->blastparameters;
  890. db_query($sql, $analysis_id, $type_id, $blastsettings);
  891. tripal_db_set_active($previous_db); // switch back to drupal database
  892. //---------------------------------------------------
  893. // Add a job if the user wants to parse the xml output
  894. //---------------------------------------------------
  895. if($node->blastjob) {
  896. $job_args[0] = $analysis_id;
  897. $job_args[1] = $node->blastdb;
  898. $job_args[2] = $node->blastfile;
  899. if (is_readable($node->blastfile)) {
  900. $fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
  901. tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
  902. 'tripal_analysis_blast_parseXMLFile', $job_args, $user->uid);
  903. } else {
  904. drupal_set_message("Can not open blast output file. Job not scheduled.");
  905. }
  906. }
  907. // next add the item to the drupal table
  908. $sql = "INSERT INTO {chado_analysis} (nid, vid, analysis_id) ".
  909. "VALUES (%d, %d, %d)";
  910. db_query($sql,$node->nid,$node->vid,$analysis_id);
  911. // Create a title for the analysis node using the unique keys so when the
  912. // node is saved, it will have a title
  913. $record = new stdClass();
  914. // If the analysis has a name, use it as the node title. If not, construct
  915. // the title using program, programversion, and sourcename
  916. if ($node->analysisname) {
  917. $record->title = $node->analysisname;
  918. } else {
  919. //Construct node title as "program (version)
  920. $record->title = "$node->program ($node->programversion)";
  921. }
  922. $record->nid = $node->nid;
  923. drupal_write_record('node',$record,'nid');
  924. drupal_write_record('node_revisions',$record,'nid');
  925. }
  926. /*******************************************************************************
  927. * Delete blast anlysis
  928. */
  929. function chado_analysis_blast_delete($node){
  930. // Before removing, get analysis_id so we can remove it from chado database
  931. // later
  932. $sql_drupal = "SELECT analysis_id ".
  933. "FROM {chado_analysis} ".
  934. "WHERE nid = %d ".
  935. "AND vid = %d";
  936. $analysis_id = db_result(db_query($sql_drupal, $node->nid, $node->vid));
  937. // Remove data from the {chado_analysis}, {node}, and {node_revisions} tables
  938. $sql_del = "DELETE FROM {chado_analysis} ".
  939. "WHERE nid = %d ".
  940. "AND vid = %d";
  941. db_query($sql_del, $node->nid, $node->vid);
  942. $sql_del = "DELETE FROM {node} ".
  943. "WHERE nid = %d ".
  944. "AND vid = %d";
  945. db_query($sql_del, $node->nid, $node->vid);
  946. $sql_del = "DELETE FROM {node_revisions} ".
  947. "WHERE nid = %d ".
  948. "AND vid = %d";
  949. db_query($sql_del, $node->nid, $node->vid);
  950. //Remove from analysisfeatureprop, analysisfeature, analysis, and analysisprop tables
  951. $previous_db = tripal_db_set_active('chado');
  952. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE analysis_id=%d";
  953. $results = db_query($sql, $analysis_id);
  954. while ($af = db_fetch_object($results)) {
  955. db_query("DELETE FROM {analysisfeatureprop} WHERE analysisfeature_id = %d", $af->analysisfeature_id);
  956. }
  957. db_query("DELETE FROM {analysisfeature} WHERE analysis_id = %d", $analysis_id);
  958. db_query("DELETE FROM {analysisprop} WHERE analysis_id = %d", $analysis_id);
  959. db_query("DELETE FROM {analysis} WHERE analysis_id = %d", $analysis_id);
  960. tripal_db_set_active($previous_db);
  961. }
  962. /*******************************************************************************
  963. * Update blast analysis
  964. */
  965. function chado_analysis_blast_update($node){
  966. global $user;
  967. if($node->revision){
  968. // TODO -- decide what to do about revisions
  969. } else {
  970. // Create a timestamp so we can insert it into the chado database
  971. $time = $node->timeexecuted;
  972. $month = $time['month'];
  973. $day = $time['day'];
  974. $year = $time['year'];
  975. $timestamp = $month.'/'.$day.'/'.$year;
  976. // get the analysis_id for this node:
  977. $sql = "SELECT analysis_id ".
  978. "FROM {chado_analysis} ".
  979. "WHERE vid = %d";
  980. $analysis_id = db_fetch_object(db_query($sql, $node->vid))->analysis_id;
  981. $sql = "UPDATE {analysis} ".
  982. "SET name = '%s', ".
  983. " description = '%s', ".
  984. " program = '%s', ".
  985. " programversion = '%s', ".
  986. " algorithm = '%s', ".
  987. " sourcename = '%s', ".
  988. " sourceversion = '%s', ".
  989. " sourceuri = '%s', ".
  990. " timeexecuted = '%s' ".
  991. "WHERE analysis_id = %d ";
  992. $previous_db = tripal_db_set_active('chado'); // use chado database
  993. db_query($sql, $node->analysisname, $node->description, $node->program,
  994. $node->programversion,$node->algorithm,$node->sourcename,
  995. $node->sourceversion, $node->sourceuri, $timestamp, $analysis_id);
  996. // Get cvterm_id for 'analysis_blast_settings'
  997. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  998. "INNER JOIN cv CV ON CV.cv_id = CVT.cv_id ".
  999. "WHERE CVT.name = 'analysis_blast_settings' ".
  1000. "AND CV.name = 'tripal'";
  1001. $type_id = db_result(db_query($sql));
  1002. $sql = "UPDATE {analysisprop} ".
  1003. "SET value = '%s' ".
  1004. "WHERE analysis_id = %d AND type_id = %d";
  1005. $blastsettings = $node->blastdb."|".$node->blastfile."|".$node->blastparameters;
  1006. db_query($sql, $blastsettings, $analysis_id, $type_id);
  1007. tripal_db_set_active($previous_db); // switch back to drupal database
  1008. // Add a job if the user wants to parse the xml output
  1009. if($node->blastjob) {
  1010. $job_args[0] = $analysis_id;
  1011. $job_args[1] = $node->blastdb;
  1012. $job_args[2] = $node->blastfile;
  1013. if (is_readable($node->blastfile)) {
  1014. $fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
  1015. tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
  1016. 'tripal_analysis_blast_parseXMLFile', $job_args, $user->uid);
  1017. } else {
  1018. drupal_set_message("Can not open blast output file. Job not scheduled.");
  1019. }
  1020. }
  1021. // Create a title for the analysis node using the unique keys so when the
  1022. // node is saved, it will have a title
  1023. $record = new stdClass();
  1024. // If the analysis has a name, use it as the node title. If not, construct
  1025. // the title using program, programversion, and sourcename
  1026. if ($node->analysisname) {
  1027. $record->title = $node->analysisname;
  1028. } else {
  1029. //Construct node title as "program (version)
  1030. $record->title = "$node->program ($node->programversion)";
  1031. }
  1032. $record->nid = $node->nid;
  1033. drupal_write_record('node',$record,'nid');
  1034. drupal_write_record('node_revisions',$record,'nid');
  1035. }
  1036. }
  1037. /*******************************************************************************
  1038. * When a node is requested by the user this function is called to allow us
  1039. * to add auxiliary data to the node object.
  1040. */
  1041. function chado_analysis_blast_load($node){
  1042. // get the analysis_id for this node:
  1043. $sql = "SELECT analysis_id FROM {chado_analysis} WHERE nid = %d";
  1044. $ana_node = db_fetch_object(db_query($sql, $node->nid));
  1045. $additions = new stdClass();
  1046. if ($ana_node) {
  1047. // get analysis information
  1048. $sql = "SELECT Analysis_id, name AS analysisname, description, program, ".
  1049. " programversion, algorithm, sourcename, sourceversion, ".
  1050. " sourceuri, timeexecuted ".
  1051. "FROM {Analysis} ".
  1052. "WHERE Analysis_id = $ana_node->analysis_id";
  1053. $previous_db = tripal_db_set_active('chado'); // use chado database
  1054. $additions = db_fetch_object(db_query($sql));
  1055. // get number of features assc with this analysis
  1056. $sql = "SELECT count(feature_id) as featurecount ".
  1057. "FROM {Analysisfeature} ".
  1058. "WHERE Analysis_id = %d";
  1059. $additions->featurecount = db_result(db_query($sql, $ana_node->analysis_id));
  1060. // get cvterm_id for 'analysis_blast_settings'
  1061. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  1062. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  1063. "WHERE CVT.name = 'analysis_blast_settings' ".
  1064. "AND CV.name = 'tripal'";
  1065. $type_id = db_result(db_query($sql));
  1066. // get analysisprop information
  1067. $sql = "SELECT value FROM {analysisprop} ".
  1068. "WHERE analysis_id = %d ".
  1069. "AND type_id = %d";
  1070. $analysisprop = db_result(db_query($sql, $ana_node->analysis_id, $type_id));
  1071. $prop_values = explode ("|", $analysisprop, 1);
  1072. $additions->blastdb = $prop_values[0];
  1073. $additions->blastfile = $prop_values[1];
  1074. $additions->blastparameters = $prop_values[2];
  1075. tripal_db_set_active($previous_db); // now use drupal database
  1076. }
  1077. // If the analysis has a name, use it as the node title. If not, construct
  1078. // the title using program programversion, and sourcename
  1079. if ($additions->analysisname) {
  1080. $additions->title = $additions->analysisname;
  1081. } else {
  1082. // Construct node title as "program version (source)
  1083. $additions->title = "$additions->program ($additions->programversion)";
  1084. }
  1085. return $additions;
  1086. }
  1087. /*******************************************************************************
  1088. * This function customizes the view of the chado_analysis node. It allows
  1089. * us to generate the markup.
  1090. */
  1091. function chado_analysis_blast_view ($node, $teaser = FALSE, $page = FALSE) {
  1092. // use drupal's default node view:
  1093. //dprint_r($node);
  1094. if (!$teaser) {
  1095. $node = node_prepare($node, $teaser);
  1096. // When previewing a node submitting form, it shows 'Array' instead of
  1097. // correct date format. We need to format the date here
  1098. $time = $node->timeexecuted;
  1099. if(is_array($time)){
  1100. $month = $time['month'];
  1101. $day = $time['day'];
  1102. $year = $time['year'];
  1103. $timestamp = $year.'-'.$month.'-'.$day;
  1104. $node->timeexecuted = $timestamp;
  1105. }
  1106. // When viewing a node, we need to reformat the analysisprop since we
  1107. // separate each value with a bar |
  1108. if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {
  1109. $prop_values = explode("|", $node->blastdb);
  1110. $node->blastdb = $prop_values[0];
  1111. $node->blastfile = $prop_values[1];
  1112. $node->blastparameters = $prop_values[2];
  1113. }
  1114. }
  1115. return $node;
  1116. }
  1117. /*******************************************************************************
  1118. * Set the permission types that the chado module uses. Essentially we
  1119. * want permissionis that protect creation, editing and deleting of chado
  1120. * data objects
  1121. */
  1122. function tripal_analysis_blast_perm(){
  1123. return array(
  1124. 'access chado_analysis_blast content',
  1125. 'create chado_analysis_blast content',
  1126. 'delete chado_analysis_blast content',
  1127. 'edit chado_analysis_blast content',
  1128. );
  1129. }
  1130. /*******************************************************************************
  1131. * The following function proves access control for users trying to
  1132. * perform actions on data managed by this module
  1133. */
  1134. function chado_analysis_blast_access($op, $node, $account){
  1135. if ($op == 'create') {
  1136. return user_access('create chado_analysis_blast content', $account);
  1137. }
  1138. if ($op == 'update') {
  1139. if (user_access('edit chado_analysis_blast content', $account)) {
  1140. return TRUE;
  1141. }
  1142. }
  1143. if ($op == 'delete') {
  1144. if (user_access('delete chado_analysis_blast content', $account)) {
  1145. return TRUE;
  1146. }
  1147. }
  1148. if ($op == 'view') {
  1149. if (user_access('access chado_analysis_blast content', $account)) {
  1150. return TRUE;
  1151. }
  1152. }
  1153. return FALSE;
  1154. }
  1155. /********************************************************************************
  1156. *
  1157. */
  1158. function tripal_analysis_blast_get_result_object($xml_string,$db,$max,$feature_id, $analysis) {
  1159. $blast_object = new stdClass();
  1160. // Get the parser using db_id
  1161. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  1162. $parser = db_fetch_object(db_query($sql, $db->db_id));
  1163. $db_name = $parser->displayname;
  1164. $is_genbank = $parser->genbank_style;
  1165. $regex_hit_id = $parser->regex_hit_id;
  1166. $regex_hit_def = $parser->regex_hit_def;
  1167. $regex_hit_accession = $parser->regex_hit_accession;
  1168. // set default if regular expressions have not been specified
  1169. if(!$regex_hit_id){
  1170. $regex_hit_id = '/^(.*?)\s.*$/';
  1171. } else {
  1172. $regex_hit_id = '/'.$regex_hit_id.'/';
  1173. }
  1174. if(!$regex_hit_def){
  1175. $regex_hit_def = '/^.*?\s(.*)$/';
  1176. } else {
  1177. $regex_hit_def = '/'.$regex_hit_def.'/';
  1178. }
  1179. if(!$regex_hit_accession){
  1180. $regex_hit_accession = '/^(.*?)\s.*$/';
  1181. } else {
  1182. $regex_hit_accession = '/'.$regex_hit_accession.'/';
  1183. }
  1184. // Get analysis information
  1185. $blast_object->analysis = $analysis;
  1186. $blast_object->db = $db;
  1187. if (!$db_name) {
  1188. $blast_object->title = $analysis->name;
  1189. } else {
  1190. $blast_object->title = $db_name;
  1191. }
  1192. // Find node id for the analysis
  1193. $ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $analysis->aid));
  1194. $blast_object->ana_nid = $ana_nid;
  1195. $blast_object->ana_time = $analysis->time;
  1196. $blast_object->ana_name = $analysis->name;
  1197. // Load the file. This XML file should be an extract
  1198. // of the original XML file with only a single iteration.
  1199. // An iteration is essentially all the hits for a single
  1200. // query sequence.
  1201. $xml_output = simplexml_load_string($xml_string);
  1202. $iteration = '';
  1203. // new XML file parser has added the feature name within <Iteration_query-def> tags.
  1204. if ($xml_output->getName() == 'Iteration') {
  1205. foreach ($xml_output->children() as $xml_tag) {
  1206. if ($xml_tag->getName() == 'Iteration_query-def') {
  1207. // Here we show the feature name again to check if we pull the correct data
  1208. $blast_object->xml_tag = $xml_tag;
  1209. } else if ($xml_tag->getName() == 'Iteration_hits') {
  1210. $iteration = $xml_tag;
  1211. }
  1212. }
  1213. // This is for the file parsed by the old parser
  1214. } else {
  1215. $iteration = $xml_output;
  1216. }
  1217. $number_hits = 0;
  1218. foreach($iteration->children() as $hits){
  1219. $number_hits ++;
  1220. }
  1221. // add the links for updating blast info using Ajax
  1222. $blast_object->max = $max;
  1223. $blast_object->number_hits = $number_hits;
  1224. $blast_object->feature_id = $feature_id;
  1225. $hits_array = array();
  1226. $hit_count = 0;
  1227. foreach($iteration->children() as $hits){
  1228. $hsp_array = array();
  1229. $counter = 0;
  1230. foreach($hits->children() as $hit){
  1231. $best_evalue = 0;
  1232. $best_identity = 0;
  1233. $best_len = 0;
  1234. $element_name = $hit->getName();
  1235. if($element_name == 'Hit_id'){
  1236. // if parsing "name, acc, desc" from three tags (1/3)
  1237. if ($is_genbank) {
  1238. $hit_name = $hit;
  1239. }
  1240. } else if($element_name == 'Hit_def'){
  1241. if($is_genbank){
  1242. $description = $hit;
  1243. } else {
  1244. $accession = preg_replace($regex_hit_accession,"$1",$hit);
  1245. $hit_name = preg_replace($regex_hit_id,"$1",$hit);
  1246. $description = preg_replace($regex_hit_def,"$1",$hit);
  1247. }
  1248. } else if($element_name == 'Hit_accession'){
  1249. // if parsing "name, acc, desc" from three tags (3/3)
  1250. if ($is_genbank){
  1251. $accession = $hit;
  1252. }
  1253. // now run through each HSP for this hit
  1254. } else if($element_name == 'Hit_hsps'){
  1255. foreach($hit->children() as $hsp){
  1256. foreach($hsp->children() as $hsp_info){
  1257. $element_name = $hsp_info->getName();
  1258. if($element_name == 'Hsp_num'){
  1259. $hsp_num = $hsp_info;
  1260. }
  1261. if($element_name == 'Hsp_bit-score'){
  1262. $hsp_bit_score = $hsp_info;
  1263. }
  1264. if($element_name == 'Hsp_score'){
  1265. $hsp_score = $hsp_info;
  1266. }
  1267. if($element_name == 'Hsp_evalue'){
  1268. $hsp_evalue = $hsp_info;
  1269. // use the first evalue for this set of HSPs
  1270. // as the best evalue. This get's shown as
  1271. // info for the overall match.
  1272. if(!$best_evalue){
  1273. $best_evalue = $hsp_evalue;
  1274. }
  1275. }
  1276. if($element_name == 'Hsp_query-from'){
  1277. $hsp_query_from = $hsp_info;
  1278. }
  1279. if($element_name == 'Hsp_query-to'){
  1280. $hsp_query_to = $hsp_info;
  1281. }
  1282. if($element_name == 'Hsp_hit-from'){
  1283. $hsp_hit_from = $hsp_info;
  1284. }
  1285. if($element_name == 'Hsp_hit-to'){
  1286. $hsp_hit_to = $hsp_info;
  1287. }
  1288. if($element_name == 'Hsp_query-frame'){
  1289. $hsp_query_frame = $hsp_info;
  1290. }
  1291. if($element_name == 'Hsp_identity'){
  1292. $hsp_identity = $hsp_info;
  1293. // use the first evalue for this set of HSPs
  1294. // as the best evalue. This get's shown as
  1295. // info for the overall match.
  1296. if(!$best_identity){
  1297. $best_identity = $hsp_identity;
  1298. }
  1299. }
  1300. if($element_name == 'Hsp_positive'){
  1301. $hsp_positive = $hsp_info;
  1302. }
  1303. if($element_name == 'Hsp_align-len'){
  1304. $hsp_align_len = $hsp_info;
  1305. // use the first evalue for this set of HSPs
  1306. // as the best evalue. This get's shown as
  1307. // info for the overall match.
  1308. if(!$best_len){
  1309. $best_len = $hsp_align_len;
  1310. }
  1311. }
  1312. if($element_name == 'Hsp_qseq'){
  1313. $hsp_qseq = $hsp_info;
  1314. }
  1315. if($element_name == 'Hsp_hseq'){
  1316. $hsp_hseq = $hsp_info;
  1317. }
  1318. if($element_name == 'Hsp_midline'){
  1319. $hsp_midline = $hsp_info;
  1320. }
  1321. }
  1322. $hsp_content = array();
  1323. $hsp_content['hsp_num'] = $hsp_num;
  1324. $hsp_content['bit_score'] = $hsp_bit_score;
  1325. $hsp_content['score'] = $hsp_score;
  1326. $hsp_content['evalue'] = $hsp_evalue;
  1327. $hsp_content['query_frame'] = $hsp_query_frame;
  1328. $hsp_content['qseq'] = $hsp_qseq;
  1329. $hsp_content['midline'] = $hsp_midline;
  1330. $hsp_content['hseq'] = $hsp_hseq;
  1331. $hsp_content['hit_from'] = $hsp_hit_from;
  1332. $hsp_content['hit_to'] = $hsp_hit_to;
  1333. $hsp_content['identity'] = $hsp_identity;
  1334. $hsp_content['align_len'] = $hsp_align_len;
  1335. $hsp_content['positive'] = $hsp_positive;
  1336. $hsp_content['query_from'] = $hsp_query_from;
  1337. $hsp_content['query_to'] = $hsp_query_to;
  1338. $hsp_array[$counter] = $hsp_content;
  1339. $counter ++;
  1340. }
  1341. }
  1342. }
  1343. $arrowr_url = url(drupal_get_path('theme', 'tripal')."/images/arrow_r.png");
  1344. $hits_array[$hit_count]['arrowr_url'] = $arrowr_url;
  1345. $hits_array[$hit_count]['accession'] = $accession;
  1346. $hits_array[$hit_count]['hit_name'] = $hit_name;
  1347. if($accession && $db->urlprefix){
  1348. $hits_array[$hit_count]['hit_url'] = "$db->urlprefix$accession";
  1349. } else {
  1350. // Test if this is another feature in the database
  1351. $sql = "SELECT feature_id FROM {feature} WHERE uniquename = '%s'";
  1352. $previous_db = db_set_active('chado');
  1353. $hit_feature_id = db_result(db_query($sql, $hit_name));
  1354. db_set_active($previous_db);
  1355. // If it is, add link to that feature
  1356. if ($hit_feature_id) {
  1357. $hits_array[$hit_count]['hit_url'] = "ID$hit_feature_id";
  1358. }
  1359. }
  1360. $hits_array[$hit_count]['best_evalue'] = $best_evalue;
  1361. $percent_identity = number_format($best_identity/$best_len*100, 2);
  1362. $hits_array[$hit_count]['percent_identity'] = $percent_identity;
  1363. $hits_array[$hit_count]['description'] = $description;
  1364. $hits_array[$hit_count]['hsp'] = $hsp_array;
  1365. $hit_count ++;
  1366. // if we've hit the maximum number of hits then return
  1367. if($max > 0 && $hit_count >= $max){
  1368. break;
  1369. }
  1370. }
  1371. $blast_object->hits_array = $hits_array;
  1372. return $blast_object;
  1373. }