tripal_analysis_blast.module 60 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580
  1. <?php
  2. // $Id:
  3. // Copyright 2009 Clemson University
  4. /*******************************************************************************
  5. * Tripal Blast Result lets users show/hide blast results associated
  6. * with a tripal feature
  7. ******************************************************************************/
  8. function tripal_analysis_blast_init(){
  9. // Add javascript and style sheet
  10. drupal_add_css(drupal_get_path('theme', 'tripal').
  11. '/css/tripal_analysis_blast.css');
  12. drupal_add_js(drupal_get_path('theme', 'tripal').
  13. '/js/tripal_analysis_blast.js');
  14. }
  15. /*******************************************************************************
  16. * tripal_analysis_blast_menu()
  17. * HOOK: Implementation of hook_menu()
  18. * Entry points and paths of the module
  19. */
  20. function tripal_analysis_blast_menu() {
  21. //Show top 10/25/all blast results for ajax calls
  22. $items['tripal_top_blast'] = array(
  23. 'path' => 'top_blast',
  24. 'title' => t('Blast Hits'),
  25. 'page callback' => 'tripal_get_blast_results',
  26. 'page arguments' => array(1,2,3,'1'),
  27. 'access arguments' => array('access content'),
  28. 'type' => MENU_CALLBACK
  29. );
  30. // Show regular expressions for selected database in Blast admin page
  31. $items['admin/tripal/tripal_blast_regex'] = array(
  32. 'title' => t('Blast Regex'),
  33. 'page callback' => 'tripal_get_blast_regex',
  34. 'page arguments' => array(3),
  35. 'access arguments' => array('administer site configuration'),
  36. 'type' => MENU_CALLBACK
  37. );
  38. return $items;
  39. }
  40. /*******************************************************************************
  41. * tripal_analysis_blast_nodeapi()
  42. * HOOK: Implementation of hook_nodeapi()
  43. * Display blast results for allowed node types
  44. */
  45. function tripal_analysis_blast_nodeapi(&$node, $op, $teaser, $page) {
  46. switch ($op) {
  47. case 'view':
  48. // Find out which node types for showing the blast
  49. $types_to_show = variable_get('tripal_analysis_blast_setting',
  50. array('chado_feature'));
  51. // Abort if this node is not one of the types we should show.
  52. if (!in_array($node->type, $types_to_show, TRUE)) {
  53. break;
  54. }
  55. // Add blast to the content item if it's not a teaser
  56. if (!$teaser && $node->feature->feature_id) {
  57. if($node->build_mode == NODE_BUILD_SEARCH_INDEX){
  58. $node->content['tripal_analysis_blast_index_version'] = array(
  59. '#value' => theme('tripal_analysis_blast_results_index_version',$node),
  60. '#weight' => 8,
  61. );
  62. } else {
  63. // Show blast result if not at teaser view
  64. $node->content['tripal_analysis_blast_form'] = array(
  65. '#value' => theme('tripal_analysis_blast_results', $node),
  66. '#weight' => 8
  67. );
  68. }
  69. }
  70. }
  71. }
  72. /************************************************************************
  73. * We need to let drupal know about our theme functions and their arguments.
  74. * We create theme functions to allow users of the module to customize the
  75. * look and feel of the output generated in this module
  76. */
  77. function tripal_analysis_blast_theme () {
  78. return array(
  79. 'tripal_analysis_blast_results_index_version' => array (
  80. 'arguments' => array('node'),
  81. ),
  82. 'tripal_analysis_blast_results' => array(
  83. 'arguments' => array('node'=> null),
  84. 'template' => 'tripal_analysis_blast_results',
  85. )
  86. );
  87. }
  88. /*******************************************************************************
  89. *
  90. */
  91. function tripal_analysis_blast_preprocess_tripal_analysis_blast_results(&$variables){
  92. $feature = $variables['node']->feature;
  93. $blast_object = tripal_get_blast_results($feature->feature_id, 0, 10, 0);
  94. $variables['blast_object'] = $blast_object;
  95. }
  96. /*******************************************************************************
  97. * Prepare blast result for the feature shown on the page
  98. */
  99. function theme_tripal_analysis_blast_results_index_version ($node) {
  100. $feature = $node->feature;
  101. $content = tripal_get_blast_results_index_version($feature->feature_id);
  102. return $content;
  103. }
  104. /*******************************************************************************
  105. * tripal_get_blast_results()
  106. * Get blast result from featureprop table for the feature
  107. */
  108. function tripal_get_blast_results($feature_id, $db_id, $max,$ajax){
  109. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  110. // for inserting into the analysisfeatureprop table
  111. $previous_db = tripal_db_set_active('chado');
  112. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  113. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  114. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  115. "AND CV.name = 'tripal'";
  116. $type_id = db_result(db_query($sql));
  117. // Get xml string from analysisfeatureprop value column, get db_id from analysisprop value column
  118. // , and get analysis_id from analysisfeature table
  119. $sql = "SELECT AP.value AS apvalue, AFP.value AS afpvalue, AF.analysis_id AS aid
  120. FROM {analysisfeatureprop} AFP
  121. INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id
  122. INNER JOIN analysisprop AP ON AP.analysis_id = AF.analysis_id
  123. WHERE feature_id = %d
  124. AND AFP.type_id = %d
  125. AND AP.value like '%|%' ";
  126. $result = db_query($sql, $feature_id, $type_id);
  127. tripal_db_set_active($previous_db);
  128. // get the HTML content for viewing each of the XML file
  129. $blast_obj_array = array ();
  130. $blast_obj_counter = 0;
  131. while ($analysisfeatureprop = db_fetch_object($result)) {
  132. // get db_id
  133. $blastsettings = explode("|", $analysisfeatureprop->apvalue);
  134. $att_db_id = $blastsettings [0];
  135. // get analysis name and date
  136. $previous_db = tripal_db_set_active('chado');
  137. $sql = "SELECT analysis_id AS aid, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  138. FROM {analysis} WHERE analysis_id = %d";
  139. $analysis = db_fetch_object(db_query($sql, $analysisfeatureprop->aid));
  140. tripal_db_set_active($previous_db);
  141. // If not called by ajax, go through all blast hits
  142. if ($ajax == 0) {
  143. // Get db object using the db_id
  144. $previous_db = tripal_db_set_active('chado');
  145. $sql = "SELECT * FROM {db} WHERE db_id=%d";
  146. $db = db_fetch_object(db_query($sql, $att_db_id));
  147. tripal_db_set_active($previous_db);
  148. $blast_obj = tripal_analysis_blast_get_result_object($analysisfeatureprop->afpvalue,$db,$max,$feature_id, $analysis);
  149. $blast_obj_array [$blast_obj_counter] = $blast_obj;
  150. $blast_obj_counter ++;
  151. // Otherwise, only update expandable box the user has clicked on
  152. } else {
  153. if ($att_db_id == $db_id) {
  154. // Get db object using the db_id
  155. $previous_db = tripal_db_set_active('chado');
  156. $sql = "SELECT * FROM {db} WHERE db_id=%d";
  157. $db = db_fetch_object(db_query($sql, $att_db_id));
  158. tripal_db_set_active($previous_db);
  159. $blast_obj = tripal_analysis_blast_get_result_object($analysisfeatureprop->afpvalue,$db,$max,$feature_id, $analysis);
  160. $blast_obj_array [$blast_obj_counter] = $blast_obj;
  161. $blast_obj_counter ++;
  162. }
  163. }
  164. }
  165. // since this function provides output for addition into
  166. // a feature page, as well as an AJAX refresh of content
  167. // within the blast hits we need to setup the return
  168. // different depending on the request type
  169. if($ajax){
  170. $content = theme('tripal_analysis_blast_results', $node);drupal_set_message("CONTENT:".$content);
  171. drupal_json(array('update' => $content));
  172. } else {
  173. return $blast_obj_array;
  174. }
  175. }
  176. /*******************************************************************************
  177. * Scanning the file folder for blast results and prepare content for indexing
  178. */
  179. function tripal_get_blast_results_index_version ($feature_id){
  180. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  181. // for inserting into the analysisfeatureprop table
  182. $previous_db = tripal_db_set_active('chado');
  183. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  184. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  185. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  186. "AND CV.name = 'tripal'";
  187. $type_id = db_result(db_query($sql));
  188. // Get xml string from analysisfeatureprop value column, get db_id from analysisprop value column
  189. // , and get analysis_id from analysisfeature table
  190. $sql = "SELECT AP.value AS apvalue, AFP.value AS afpvalue, AF.analysis_id AS aid
  191. FROM {analysisfeatureprop} AFP
  192. INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id
  193. INNER JOIN analysisprop AP ON AP.analysis_id = AF.analysis_id
  194. WHERE feature_id = %d
  195. AND AFP.type_id = %d ";
  196. $result = db_query($sql, $feature_id, $type_id);
  197. tripal_db_set_active($previous_db);
  198. // get the HTML content for viewing each of the XML file
  199. while ($analysisfeatureprop = db_fetch_object($result)) {
  200. // get analysis name and date
  201. $previous_db = tripal_db_set_active('chado');
  202. $sql = "SELECT analysis_id AS aid, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  203. FROM {analysis} WHERE analysis_id = %d";
  204. $analysis = db_fetch_object(db_query($sql, $analysisfeatureprop->aid));
  205. tripal_db_set_active($previous_db);
  206. $blastsettings = explode("|", $analysisfeatureprop->apvalue);
  207. $att_db_id = $blastsettings [0];
  208. // Get db object using the db_id
  209. $previous_db = tripal_db_set_active('chado');
  210. $sql = "SELECT * FROM {db} WHERE db_id=%d";
  211. $db = db_fetch_object(db_query($sql, $att_db_id));
  212. tripal_db_set_active($previous_db);
  213. // Only index best 10 hits because the default page only shows 10 blast results
  214. $max = 10;
  215. $content .= parse_NCBI_Blast_XML_index_version($analysisfeatureprop->afpvalue,$db,$max,$feature_id,$ajax, $analysis);
  216. }
  217. return $content;
  218. }
  219. /*******************************************************************************
  220. * Parse NCBI Blast results for indexing so that user can use blast results to
  221. * find corresponding features
  222. */
  223. function parse_NCBI_Blast_XML_index_version($xml_string,$db,$feature_id) {
  224. // Get the parser using db_id
  225. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  226. $parser = db_fetch_object(db_query($sql, $db->db_id));
  227. $db_name = $parser->displayname;
  228. $is_genbank = $parser->genbank_style;
  229. $regex_hit_id = $parser->regex_hit_id;
  230. $regex_hit_def = $parser->regex_hit_def;
  231. $regex_hit_accession = $parser->regex_hit_accession;
  232. // set default if regular expressions have not been specified
  233. if(!$regex_hit_id){
  234. $regex_hit_id = '/^(.*?)\s.*$/';
  235. } else {
  236. $regex_hit_id = '/'.$regex_hit_id.'/';
  237. }
  238. if(!$regex_hit_def){
  239. $regex_hit_def = '/^.*?\s(.*)$/';
  240. } else {
  241. $regex_hit_def = '/'.$regex_hit_def.'/';
  242. }
  243. if(!$regex_hit_accession){
  244. $regex_hit_accession = '/^(.*?)\s.*$/';
  245. } else {
  246. $regex_hit_accession = '/'.$regex_hit_accession.'/';
  247. }
  248. $html_out .= "<h3>$db_name</h3>";
  249. // Load the file. This XML file should be an extract
  250. // of the original XML file with only a single iteration.
  251. // An iteration is essentially all the hits for a single
  252. // query sequence.
  253. $xml_output = simplexml_load_string($xml_string);
  254. $iteration = '';
  255. // new XML file parser has added the feature name within <Iteration_query-def> tags.
  256. if ($xml_output->getName() == 'Iteration') {
  257. foreach ($xml_output->children() as $xml_tag) {
  258. if ($xml_tag->getName() == 'Iteration_query-def') {
  259. // Here we show the feature name again to check if we pull the correct data
  260. $html_out .= "Query: $xml_tag<br>";
  261. } else if ($xml_tag->getName() == 'Iteration_hits') {
  262. $iteration = $xml_tag;
  263. }
  264. }
  265. // This is for the file parsed by the old parser
  266. } else {
  267. $iteration = $xml_output;
  268. }
  269. // now run through the blast hits/hsps of this iteration
  270. // and generate the rows of the table
  271. foreach($iteration->children() as $hits){
  272. $best_evalue = 0;
  273. foreach($hits->children() as $hit){
  274. $best_evalue = 0;
  275. $element_name = $hit->getName();
  276. if($element_name == 'Hit_id'){
  277. // if parsing "name, acc, desc" from three tags (1/3)
  278. if ($is_genbank) {
  279. $hit_name = $hit;
  280. }
  281. } else if($element_name == 'Hit_def'){
  282. if($is_genbank){
  283. $description = $hit;
  284. } else {
  285. $accession = preg_replace($regex_hit_accession,"$1",$hit);
  286. $hit_name = preg_replace($regex_hit_id,"$1",$hit);
  287. $description = preg_replace($regex_hit_def,"$1",$hit);
  288. }
  289. } else if($element_name == 'Hit_accession'){
  290. // if parsing "name, acc, desc" from three tags (3/3)
  291. if ($is_genbank){
  292. $accession = $hit;
  293. }
  294. // now run through each HSP for this hit
  295. }
  296. }
  297. $html_out .= "<p>$hit_name<br>";
  298. $html_out .= "$accession<br>";
  299. $html_out .= "<b>$description</b></br>";
  300. $hsp_html_out = '';
  301. }
  302. return $html_out;
  303. }
  304. /*******************************************************************************
  305. * Tripal Blast administrative setting form. This function is called by
  306. * tripal_analysis module which asks for an admin form to show on the page
  307. */
  308. function tripal_analysis_blast_get_settings() {
  309. // Get an array of node types with internal names as keys
  310. $options = node_get_types('names');
  311. // Add 'chado_feature' to allowed content types for showing blast results
  312. $allowedoptions ['chado_feature'] = "Show blast results on feature pages";
  313. $form['description'] = array(
  314. '#type' => 'item',
  315. '#value' => t("Most chado features were analyzed by blast against major sequence databases. This option allows user to display the blast analysis results. Please read user manual for storage and display of blast files. Check the box to enable the analysis results. Uncheck to disable it."),
  316. '#weight' => 0,
  317. );
  318. $form['tripal_analysis_blast_setting'] = array(
  319. '#type' => 'checkboxes',
  320. '#options' => $allowedoptions,
  321. '#default_value' => variable_get('tripal_analysis_blast_setting',
  322. array('chado_feature')),
  323. );
  324. $form['blast_parser'] = array(
  325. '#title' => t('Blast Parser Settings'),
  326. '#type' => 'fieldset',
  327. '#description' => t('Configure parsers for showing blast results. Each database is '.
  328. 'allowed to have one xml parser.'),
  329. '#weight' => 10
  330. );
  331. $previous_db = tripal_db_set_active('chado'); // use chado database
  332. // get a list of db from chado for user to choose
  333. $sql = 'SELECT db_id, name FROM {db} ORDER BY lower(name)';
  334. $results = db_query ($sql);
  335. $blastdbs = array();
  336. while ($db = db_fetch_object($results)){
  337. $blastdbs[$db->db_id] = $db->name;
  338. }
  339. $form['db_options'] = array(
  340. '#type' => 'value',
  341. '#value' => $blastdbs
  342. );
  343. $form['blast_parser']['blastdb'] = array(
  344. '#title' => t('Database'),
  345. '#type' => 'select',
  346. '#description' => t('The database used for the blast analysis.'),
  347. '#options' => $form['db_options']['#value'],
  348. '#attributes' => array(
  349. 'onChange' => "return tripal_update_regex(this)",
  350. )
  351. );
  352. $form['blast_parser']['displayname'] = array(
  353. '#title' => t('Title for the blast analysis'),
  354. '#type' => 'textfield',
  355. );
  356. $form['blast_parser']['gb_style_parser'] = array(
  357. '#title' => t('Use Genebank style parser. This will clear all regular expression settings for the selected database.'),
  358. '#type' => 'checkbox',
  359. '#attributes' => array(
  360. 'onClick' => "return tripal_set_genbank_style(this)",
  361. )
  362. );
  363. $form['blast_parser']['hit_id'] = array(
  364. '#title' => t('Regular expression for Hit Name'),
  365. '#type' => 'textfield',
  366. );
  367. $form['blast_parser']['hit_def'] = array(
  368. '#title' => t('Regular expression for Hit Description'),
  369. '#type' => 'textfield',
  370. );
  371. $form['blast_parser']['hit_accession'] = array(
  372. '#title' => t('Regular expression for Hit Accession'),
  373. '#type' => 'textfield',
  374. );
  375. $form['blast_parser']['button'] = array(
  376. '#type' => 'submit',
  377. '#value' => t('Save settings')
  378. );
  379. tripal_db_set_active($previous_db); // use drupal database
  380. $settings->form = $form;
  381. $settings->title = "Tripal Blast";
  382. return $settings;
  383. }
  384. /*******************************************************************************
  385. * Parse Blast XML Output file into analysisfeatureprop table
  386. */
  387. function tripal_analysis_blast_parseXMLFile ($analysis_id, $blastdb, $blastfile, $job_id) {
  388. // Prepare log
  389. $filename = preg_replace("/.*\/(.*)/", "$1", $blastfile);
  390. $logfile = file_directory_path() . "/tripal/tripal_analysis_blast/load_$filename.log";
  391. $log = fopen($logfile, 'a'); // append parsing results to log file
  392. // If user input a file (e.g. blast.xml)
  393. if (is_file($blastfile)) {
  394. // Parsing started
  395. print "Parsing File:".$blastfile." ...\n";
  396. fwrite($log, date("D M j G:i:s Y").". Loading $blastfile\n");
  397. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  398. // for inserting into the analysisfeatureprop table
  399. $previous_db = tripal_db_set_active('chado'); // use chado database
  400. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  401. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  402. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  403. "AND CV.name = 'tripal'";
  404. $type_id = db_result(db_query($sql));
  405. // Load the XML file.
  406. $blastoutput = simplexml_load_file($blastfile);
  407. $no_iterations = 0;
  408. foreach($blastoutput->children() as $tmp) {
  409. if ($tmp->getName() == 'BlastOutput_iterations') {
  410. foreach($tmp->children() as $itr) {
  411. if ($itr->getName() == 'Iteration') {
  412. $no_iterations ++;
  413. }
  414. }
  415. }
  416. }
  417. print "$no_iterations iterations to be processed.\n";
  418. $interval = intval($no_iterations * 0.01);
  419. $idx_iterations = 0;
  420. foreach ($blastoutput->children() as $blastoutput_tags) {
  421. if ($blastoutput_tags->getName() == 'BlastOutput_iterations') {
  422. foreach($blastoutput_tags->children() as $iterations) {
  423. if ($iterations->getName() == 'Iteration') {
  424. // Set job status
  425. $idx_iterations ++;
  426. if ($idx_iterations % $interval == 0) {
  427. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  428. tripal_db_set_active($previous_db);
  429. tripal_job_set_progress($job_id, $percentage);
  430. $previous_db = tripal_db_set_active('chado');
  431. print $percentage."% ";
  432. }
  433. // now run through the blast hits/hsps of this iteration
  434. // and generate the rows of the table
  435. $feature_id = 0;
  436. foreach($iterations->children() as $iteration_tags) {
  437. // Match chado feature uniquename with <Iteration_query-def>
  438. // and get the feature_id
  439. $featurenaem_xml = '';
  440. if($iteration_tags->getName() == 'Iteration_query-def'){
  441. // If the Iteration_query-def in the format of "feature_id|uniquename"
  442. // get feature_id from it directly
  443. if (preg_match("/^(\d+)\|.+/", $iteration_tags, $matches)) {
  444. $feature_id = $matches[1];
  445. // If not in above format, try to match <Iteration_query-def> to feature's uniquename
  446. } else {
  447. // treat the first word of <Iteration_query-def> as uniquename
  448. $first_word = $iteration_tags;
  449. if (preg_match('/^(.*?)\s.*$/', $iteration_tags, $matches)) {
  450. $first_word = $matches[1];
  451. }
  452. // Find out how many features match this uniquename
  453. $sql = "SELECT count(feature_id) FROM {feature} ".
  454. "WHERE uniquename = '%s' ";
  455. $no_features = db_result(db_query($sql, $first_word));
  456. // If there is only one match, get the feature_id
  457. if ($no_features == 1) {
  458. $sql = "SELECT feature_id FROM {feature} ".
  459. "WHERE uniquename = '%s' ";
  460. $feature_id = db_result(db_query($sql, $first_word));
  461. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  462. } else if ($no_features > 1) {
  463. fwrite($log, "Ambiguous: ".$first_word." matches more than one feature and is not processed.\n");
  464. continue;
  465. // If the uniquename did not match, skip and print 'Failed'
  466. } else {
  467. fwrite($log, "Failed: ".$first_word."\n");
  468. }
  469. }
  470. // Successfully matched. print 'Succeeded'
  471. if ($feature_id) {
  472. fwrite($log, "Succeeded: ".$first_word." => feature id:".$feature_id);
  473. $featurename_xml = $iteration_tags->asXML();
  474. }
  475. // Insert Iteration_hits into analysisfeatureprop and analysisfeature tables
  476. } else if($iteration_tags->getName() == 'Iteration_hits'){
  477. if ($feature_id) {
  478. // Make sure this iteration doesn't exist in analysisfeatureprop. If it does, update but not insert
  479. $sql = "SELECT analysisfeatureprop_id FROM {analysisfeatureprop} AFP ".
  480. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  481. "WHERE feature_id=%d ".
  482. "AND analysis_id=%d ".
  483. "AND type_id=%d ";
  484. $result = db_query($sql, $feature_id, $analysis_id, $type_id);
  485. $analysisfeatureprop = db_fetch_object($result);
  486. $xml_content = "<Iteration>\n".$featurename_xml."\n".$iteration_tags->asXML()."\n</Iteration>";
  487. // If this Iteration_hits already exists, update it
  488. if ($analysisfeatureprop) {
  489. $sql = "UPDATE {analysisfeatureprop} ".
  490. "SET value = '%s' ".
  491. "WHERE analysisfeatureprop_id = %d ";
  492. db_query($sql, $xml_content, $analysisfeatureprop->analysisfeatureprop_id);
  493. fwrite($log, " (Update)\n"); // write to log
  494. // Otherwise, insert the Iteration_hits into analysisfeature and analysisfeatureprop tables
  495. } else {
  496. //------------------------------------------------------
  497. // Insert into analysisfeature table
  498. //------------------------------------------------------
  499. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  500. "VALUES (%d, %d)";
  501. db_query ($sql, $feature_id, $analysis_id);
  502. // Get the newly inserted analysisfeature_id
  503. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
  504. $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
  505. //------------------------------------------------------
  506. // Insert into analysisfeatureprop table
  507. //------------------------------------------------------
  508. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  509. "VALUES (%d, %d, '%s', %d)";
  510. db_query($sql, $analysisfeature_id, $type_id, $xml_content, '0');
  511. fwrite($log, " (Insert)\n"); // write to log
  512. }
  513. }
  514. }
  515. }
  516. }
  517. }
  518. }
  519. }
  520. tripal_db_set_active ($previous_db); // Use drupal database
  521. // Otherwise, $blastfile is a directory. Iterate through all xml files in it
  522. } else {
  523. $dir_handle = @opendir($blastfile) or die("Unable to open $blastfile");
  524. $pattern = sql_regcase($blastfile . "/*.XML");
  525. $total_files = count(glob($pattern));
  526. print "$total_files file(s) to be parsed.\n";
  527. $interval = intval($total_files * 0.01);
  528. $no_file = 0;
  529. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  530. // for inserting into the analysisfeatureprop table
  531. $previous_db = tripal_db_set_active('chado'); // use chado database
  532. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  533. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  534. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  535. "AND CV.name = 'tripal'";
  536. $type_id = db_result(db_query($sql));
  537. // Parsing all files in the directory
  538. while ($file = readdir($dir_handle)) {
  539. if(preg_match("/^.*\.XML/i",$file)){
  540. // Set job status
  541. if ($no_file % $interval == 0) {
  542. $percentage = (int) ($no_file / $total_files * 100);
  543. tripal_db_set_active($previous_db);
  544. tripal_job_set_progress($job_id, $percentage);
  545. $previous_db = tripal_db_set_active('chado');
  546. print $percentage."% ";
  547. }
  548. // Parsing started
  549. print "Parsing File:".$file.". ";
  550. fwrite($log, date("D M j G:i:s Y").". Loading $file\n");
  551. // Load the XML file.
  552. $blastoutput = simplexml_load_file($blastfile."/".$file);
  553. $no_iterations = 0;
  554. foreach($blastoutput->children() as $tmp) {
  555. if ($tmp->getName() == 'BlastOutput_iterations') {
  556. foreach($tmp->children() as $itr) {
  557. if ($itr->getName() == 'Iteration') {
  558. $no_iterations ++;
  559. }
  560. }
  561. }
  562. }
  563. print "$no_iterations iterations to be processed.\n";
  564. foreach ($blastoutput->children() as $blastoutput_tags) {
  565. if ($blastoutput_tags->getName() == 'BlastOutput_iterations') {
  566. foreach($blastoutput_tags->children() as $iterations) {
  567. if ($iterations->getName() == 'Iteration') {
  568. // now run through the blast hits/hsps of this iteration
  569. // and generate the rows of the table
  570. $feature_id = 0;
  571. foreach($iterations->children() as $iteration_tags) {
  572. // Match chado feature uniquename with <Iteration_query-def>
  573. // and get the feature_id
  574. $featurenaem_xml = '';
  575. if($iteration_tags->getName() == 'Iteration_query-def'){
  576. // If the Iteration_query-def in the format of "feature_id|uniquename"
  577. // get feature_id from it directly
  578. if (preg_match("/^(\d+)\|.+/", $iteration_tags, $matches)) {
  579. $feature_id = $matches[1];
  580. // If not in above format, try to match <Iteration_query-def> to feature's uniquename
  581. } else {
  582. // treat the first word of <Iteration_query-def> as uniquename
  583. $first_word = $iteration_tags;
  584. if (preg_match('/^(.*?)\s.*$/', $iteration_tags, $matches)) {
  585. $first_word = $matches[1];
  586. }
  587. // Find out how many features match this uniquename
  588. $sql = "SELECT count(feature_id) FROM {feature} ".
  589. "WHERE uniquename = '%s' ";
  590. $no_features = db_result(db_query($sql, $first_word));
  591. // If there is only one match, get the feature_id
  592. if ($no_features == 1) {
  593. $sql = "SELECT feature_id FROM {feature} ".
  594. "WHERE uniquename = '%s' ";
  595. $feature_id = db_result(db_query($sql, $first_word));
  596. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  597. } else if ($no_features > 1) {
  598. fwrite($log, "Ambiguous: ".$first_word." matches more than one feature and is not processed.\n");
  599. continue;
  600. // If the uniquename did not match, skip and print 'Failed'
  601. } else {
  602. fwrite($log, "Failed: ".$first_word."\n");
  603. }
  604. }
  605. // Successfully matched. print 'Succeeded'
  606. if ($feature_id) {
  607. fwrite($log, "Succeeded: ".$first_word." => feature id:".$feature_id);
  608. $featurename_xml = $iteration_tags->asXML();
  609. }
  610. // Insert Iteration_hits into analysisfeatureprop and analysisfeature tables
  611. } else if($iteration_tags->getName() == 'Iteration_hits'){
  612. if ($feature_id) {
  613. // Make sure this iteration doesn't exist in analysisfeatureprop. If it does, update but not insert
  614. $sql = "SELECT analysisfeatureprop_id FROM {analysisfeatureprop} AFP ".
  615. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  616. "WHERE feature_id=%d ".
  617. "AND analysis_id=%d ".
  618. "AND type_id=%d ";
  619. $result = db_query($sql, $feature_id, $analysis_id, $type_id);
  620. $analysisfeatureprop = db_fetch_object($result);
  621. $xml_content = "<Iteration>\n".$featurename_xml."\n".$iteration_tags->asXML()."\n</Iteration>";
  622. // If this Iteration_hits already exists, update it
  623. if ($analysisfeatureprop) {
  624. $sql = "UPDATE {analysisfeatureprop} ".
  625. "SET value = '%s' ".
  626. "WHERE analysisfeatureprop_id = %d ";
  627. db_query($sql, $xml_content, $analysisfeatureprop->analysisfeatureprop_id);
  628. fwrite($log, " (Update)\n"); // write to log
  629. // Otherwise, insert the Iteration_hits into analysisfeature and analysisfeatureprop tables
  630. } else {
  631. //------------------------------------------------------
  632. // Insert into analysisfeature table
  633. //------------------------------------------------------
  634. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  635. "VALUES (%d, %d)";
  636. db_query ($sql, $feature_id, $analysis_id);
  637. // Get the newly inserted analysisfeature_id
  638. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
  639. $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
  640. //------------------------------------------------------
  641. // Insert into analysisfeatureprop table
  642. //------------------------------------------------------
  643. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  644. "VALUES (%d, %d, '%s', %d)";
  645. db_query($sql, $analysisfeature_id, $type_id, $xml_content, '0');
  646. fwrite($log, " (Insert)\n"); // write to log
  647. }
  648. }
  649. }
  650. }
  651. }
  652. }
  653. }
  654. }
  655. $no_file ++;
  656. }
  657. }
  658. tripal_db_set_active ($previous_db); // Use drupal database
  659. }
  660. print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
  661. fwrite($log, "\n");
  662. fclose($log);
  663. return;
  664. }
  665. /*******************************************************************************
  666. * This function is only called by ajax to get regular expressions for blast
  667. * admin page
  668. */
  669. function tripal_get_blast_regex ($db_id) {
  670. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  671. $blast_regexs = db_fetch_object(db_query($sql, $db_id));
  672. drupal_json(
  673. array(
  674. 'name' => $blast_regexs->displayname,
  675. 'genbank_style' => $blast_regexs->genbank_style,
  676. 'reg1' => $blast_regexs->regex_hit_id,
  677. 'reg2' => $blast_regexs->regex_hit_def,
  678. 'reg3' => $blast_regexs->regex_hit_accession,
  679. )
  680. );
  681. }
  682. /*******************************************************************************
  683. * Provide information to drupal about the node types that we're creating
  684. * in this module
  685. */
  686. function tripal_analysis_blast_node_info() {
  687. $nodes = array();
  688. $nodes['chado_analysis_blast'] = array(
  689. 'name' => t('Analysis: Blast'),
  690. 'module' => 'chado_analysis_blast',
  691. 'description' => t('A blast analysis from the chado database'),
  692. 'has_title' => FALSE,
  693. 'title_label' => t('Analysis: Blast'),
  694. 'has_body' => FALSE,
  695. 'body_label' => t('Blast Analysis Description'),
  696. 'locked' => TRUE
  697. );
  698. return $nodes;
  699. }
  700. /*******************************************************************************
  701. * Provide a Blast Analysis form
  702. */
  703. function chado_analysis_blast_form ($node){
  704. //dprint_r($node);
  705. $type = node_get_types('type', $node);
  706. $form = array();
  707. $form['title']= array(
  708. '#type' => 'hidden',
  709. '#default_value' => $node->title,
  710. );
  711. $form['analysisname']= array(
  712. '#type' => 'textfield',
  713. '#title' => t('Analysis Name'),
  714. '#required' => FALSE,
  715. '#default_value' => $node->analysisname,
  716. '#weight' => 1
  717. );
  718. $form['program']= array(
  719. '#type' => 'textfield',
  720. '#title' => t('Program'),
  721. '#required' => TRUE,
  722. '#default_value' => $node->program,
  723. '#weight' => 2
  724. );
  725. $form['programversion']= array(
  726. '#type' => 'textfield',
  727. '#title' => t('Program Version'),
  728. '#required' => TRUE,
  729. '#default_value' => $node->programversion,
  730. '#weight' => 3
  731. );
  732. $form['algorithm']= array(
  733. '#type' => 'textfield',
  734. '#title' => t('Algorithm'),
  735. '#required' => FALSE,
  736. '#default_value' => $node->algorithm,
  737. '#weight' => 4
  738. );
  739. $form['sourcename']= array(
  740. '#type' => 'textfield',
  741. '#title' => t('Source Name'),
  742. '#required' => FALSE,
  743. '#default_value' => $node->sourcename,
  744. '#weight' => 5
  745. );
  746. $form['sourceversion']= array(
  747. '#type' => 'textfield',
  748. '#title' => t('Source Version'),
  749. '#required' => FALSE,
  750. '#default_value' => $node->sourceversion,
  751. '#weight' => 6
  752. );
  753. $form['sourceuri']= array(
  754. '#type' => 'textfield',
  755. '#title' => t('Source URI'),
  756. '#required' => FALSE,
  757. '#default_value' => $node->sourceuri,
  758. '#weight' => 7
  759. );
  760. // Get time saved in chado
  761. $default_time = $node->timeexecuted;
  762. $year = preg_replace("/^(\d+)-\d+-\d+ .*/", "$1", $default_time);
  763. $month = preg_replace("/^\d+-0?(\d+)-\d+ .*/", "$1", $default_time);
  764. $day = preg_replace("/^\d+-\d+-0?(\d+) .*/", "$1", $default_time);
  765. // If the time is not set, use current time
  766. if (!$default_time) {
  767. $default_time = time();
  768. $year = format_date($default_time, 'custom', 'Y');
  769. $month = format_date($default_time, 'custom', 'n');
  770. $day = format_date($default_time, 'custom', 'j');
  771. }
  772. $form['timeexecuted']= array(
  773. '#type' => 'date',
  774. '#title' => t('Time Executed'),
  775. '#required' => TRUE,
  776. '#default_value' => array(
  777. 'year' => $year,
  778. 'month' => $month,
  779. 'day' => $day,
  780. ),
  781. '#weight' => 8
  782. );
  783. $form['description']= array(
  784. '#type' => 'textarea',
  785. '#rows' => 15,
  786. '#title' => t('Description and/or Program Settings'),
  787. '#required' => FALSE,
  788. '#default_value' => check_plain($node->description),
  789. '#weight' => 9
  790. );
  791. // Blast specific settings
  792. if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {
  793. $prop_values = explode("|", $node->blastdb);
  794. $node->blastdb = $prop_values[0];
  795. $node->blastfile = $prop_values[1];
  796. $node->blastparameters = $prop_values[2];
  797. }
  798. $form['blast'] = array(
  799. '#title' => t('Blast Settings'),
  800. '#type' => 'fieldset',
  801. '#description' => t('Specific Settings for Blast Analysis.'),
  802. '#collapsible' => TRUE,
  803. '#attributes' => array('id' => 'blast-extra-settings'),
  804. '#weight' => 11
  805. );
  806. $previous_db = tripal_db_set_active('chado'); // use chado database
  807. // get a list of db from chado for user to choose
  808. $sql = 'SELECT db_id, name FROM {db} ORDER BY lower(name)';
  809. $results = db_query ($sql);
  810. tripal_db_set_active($previous_db);
  811. $blastdbs = array();
  812. while ($db = db_fetch_object($results)){
  813. $blastdbs[$db->db_id] = $db->name;
  814. }
  815. $form['db_options'] = array(
  816. '#type' => 'value',
  817. '#value' => $blastdbs
  818. );
  819. $form['blast']['blastdb'] = array(
  820. '#title' => t('Database'),
  821. '#type' => 'select',
  822. '#description' => t('The database used for the blast analysis.'),
  823. '#options' => $form['db_options']['#value'],
  824. '#default_value' => $node->blastdb,
  825. );
  826. $form['blast']['blastfile'] = array(
  827. '#title' => t('Blast xml File: (if you input a directory without the tailing slash, all xml files in the directory will be loaded)'),
  828. '#type' => 'textfield',
  829. '#description' => t('The xml output file generated by blast in full path.'),
  830. '#default_value' => $node->blastfile,
  831. );
  832. $form['blast']['blastjob'] = array(
  833. '#type' => 'checkbox',
  834. '#title' => t('Submit a job to parse the xml output into analysisfeatureprop table'),
  835. '#description' => t('Note: features associated with the blast results must '.
  836. 'exist in chado before parsing the file. Otherwise, blast '.
  837. 'results that cannot be linked to a feature will be '.
  838. 'discarded. Also, Triapl Blast module needs to be enabled.'),
  839. '#default_value' => $node->blastjob
  840. );
  841. $form['blast']['blastparameters'] = array(
  842. '#title' => t('Parameters'),
  843. '#type' => 'textfield',
  844. '#description' => t('The parameters for running the blast analysis.'),
  845. '#default_value' => $node->blastparameters,
  846. );
  847. return $form;
  848. }
  849. function chado_analysis_blast_validate($node, &$form){
  850. ##dprint_r($node);
  851. // This validation is being used for three activities:
  852. // CASE A: Update a node that exists in both drupal and chado
  853. // CASE B: Synchronizing a node from chado to drupal
  854. // CASE C: Inserting a new node that exists in niether drupal nor chado
  855. // Only nodes being updated will have an nid already
  856. if($node->nid){
  857. //---------------------------------------------------
  858. // CASE A: We are validating a form for updating an existing node
  859. //---------------------------------------------------
  860. // TO DO: check that the new fields don't yield a non-unique primary key in chado
  861. }
  862. else{
  863. // To differentiate if we are syncing or creating a new analysis altogther, see if an
  864. // analysis_id already exists
  865. if($node->analysis_id){
  866. //---------------------------------------------------
  867. // CASE B: Synchronizing a node from chado to drupal
  868. //---------------------------------------------------
  869. }
  870. else{
  871. //---------------------------------------------------
  872. // CASE C: We are validating a form for inserting a new node
  873. //---------------------------------------------------
  874. // The primary key for the chado analysis table is
  875. // program, programversion, sourcename
  876. // Check to see if this analysis really is new -ie, it doesn't have the same
  877. // primary key as any other analysis
  878. $sql = "SELECT analysis_id ".
  879. "FROM {analysis} ".
  880. "WHERE program='%s'".
  881. "AND programversion='%s'".
  882. "AND sourcename='%s'";
  883. $previous_db = tripal_db_set_active('chado');
  884. $analysis_id = db_result(db_query($sql, $node->program, $node->programversion, $node->sourcename));
  885. tripal_db_set_active($previous_db);
  886. if($analysis_id){
  887. //---------------------------------------------------
  888. // this primary key already exists in chado analysis table!
  889. //---------------------------------------------------
  890. // check to see if it has also been synced with drupal
  891. $sql = "SELECT nid FROM {chado_analysis} ".
  892. "WHERE analysis_id = %d";
  893. $node_id = db_result(db_query($sql, $analysis_id));
  894. if($node_id){
  895. //---------------------------------------------------
  896. // the analysis has already been synced with drupal, redirect the user
  897. // to modify that node or start over
  898. //---------------------------------------------------
  899. $error = 'This analysis already exists in the chado database (analysis id ';
  900. $error .= $analysis_id.') and has been synchronized ';
  901. $error .= 'with drupal. See node '.$node_id.' if you wish to update that analysis. ';
  902. $error .= ' For a new analysis, please select a unique primary key ';
  903. $error .= '(primary key consists of sourcename, program and programversion).';
  904. form_set_error('sourcename', t($error));
  905. }
  906. else{
  907. //---------------------------------------------------
  908. // the analysis does not exist in drupal - tell the user
  909. // to sync from chado or create a new unique primary key
  910. //---------------------------------------------------
  911. $error = 'This analysis already exists in the chado database (analysis id ';
  912. $error .= $analysis_id.') but has not been synchronized ';
  913. $error .= 'with drupal. See the tripal admin pages to synchronize. ';
  914. $error .= ' For a new analysis, please select a unique primary key ';
  915. $error .= '(primary key consists of sourcename, program and programversion).';
  916. form_set_error('sourcename', t($error));
  917. }
  918. }
  919. }
  920. }
  921. }
  922. function chado_analysis_blast_insert($node){
  923. global $user;
  924. // Create a timestamp so we can insert it into the chado database
  925. $time = $node->timeexecuted;
  926. $month = $time['month'];
  927. $day = $time['day'];
  928. $year = $time['year'];
  929. $timestamp = $month.'/'.$day.'/'.$year;
  930. //---------------------------------------------------
  931. // First add the item to the chado analysis table
  932. //---------------------------------------------------
  933. $sql = "INSERT INTO {analysis} ".
  934. " (name, description, program, programversion, algorithm, ".
  935. " sourcename, sourceversion, sourceuri, timeexecuted) ".
  936. "VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s')";
  937. $previous_db = tripal_db_set_active('chado'); // use chado database
  938. db_query($sql,$node->analysisname, $node->description,
  939. $node->program,$node->programversion,$node->algorithm,
  940. $node->sourcename, $node->sourceversion, $node->sourceuri,
  941. $timestamp);
  942. // find the newly entered analysis_id
  943. $sql = "SELECT analysis_id ".
  944. "FROM {analysis} ".
  945. "WHERE program='%s'".
  946. "AND programversion='%s'".
  947. "AND sourcename='%s'";
  948. $analysis_id = db_result(db_query($sql, $node->program,
  949. $node->programversion, $node->sourcename));
  950. // Get cvterm_id for 'analysis_blast_settings'
  951. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  952. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  953. "WHERE CVT.name = 'analysis_blast_settings' ".
  954. "AND CV.name = 'tripal'";
  955. $type_id = db_result(db_query($sql));
  956. //---------------------------------------------------
  957. // Insert into chado {analysisprop} table
  958. //---------------------------------------------------
  959. $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) ".
  960. "VALUES (%d, %d, '%s')";
  961. $blastsettings = $node->blastdb."|".$node->blastfile."|".$node->blastparameters;
  962. db_query($sql, $analysis_id, $type_id, $blastsettings);
  963. tripal_db_set_active($previous_db); // switch back to drupal database
  964. //---------------------------------------------------
  965. // Add a job if the user wants to parse the xml output
  966. //---------------------------------------------------
  967. if($node->blastjob) {
  968. $job_args[0] = $analysis_id;
  969. $job_args[1] = $node->blastdb;
  970. $job_args[2] = $node->blastfile;
  971. if (is_readable($node->blastfile)) {
  972. $fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
  973. tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
  974. 'tripal_analysis_blast_parseXMLFile', $job_args, $user->uid);
  975. } else {
  976. drupal_set_message("Can not open blast output file. Job not scheduled.");
  977. }
  978. }
  979. // next add the item to the drupal table
  980. $sql = "INSERT INTO {chado_analysis} (nid, vid, analysis_id) ".
  981. "VALUES (%d, %d, %d)";
  982. db_query($sql,$node->nid,$node->vid,$analysis_id);
  983. // Create a title for the analysis node using the unique keys so when the
  984. // node is saved, it will have a title
  985. $record = new stdClass();
  986. // If the analysis has a name, use it as the node title. If not, construct
  987. // the title using program, programversion, and sourcename
  988. if ($node->analysisname) {
  989. $record->title = $node->analysisname;
  990. } else {
  991. //Construct node title as "program (version)
  992. $record->title = "$node->program ($node->programversion)";
  993. }
  994. $record->nid = $node->nid;
  995. drupal_write_record('node',$record,'nid');
  996. drupal_write_record('node_revisions',$record,'nid');
  997. }
  998. /*******************************************************************************
  999. * Delete blast anlysis
  1000. */
  1001. function chado_analysis_blast_delete($node){
  1002. // Before removing, get analysis_id so we can remove it from chado database
  1003. // later
  1004. $sql_drupal = "SELECT analysis_id ".
  1005. "FROM {chado_analysis} ".
  1006. "WHERE nid = %d ".
  1007. "AND vid = %d";
  1008. $analysis_id = db_result(db_query($sql_drupal, $node->nid, $node->vid));
  1009. // Remove data from the {chado_analysis}, {node}, and {node_revisions} tables
  1010. $sql_del = "DELETE FROM {chado_analysis} ".
  1011. "WHERE nid = %d ".
  1012. "AND vid = %d";
  1013. db_query($sql_del, $node->nid, $node->vid);
  1014. $sql_del = "DELETE FROM {node} ".
  1015. "WHERE nid = %d ".
  1016. "AND vid = %d";
  1017. db_query($sql_del, $node->nid, $node->vid);
  1018. $sql_del = "DELETE FROM {node_revisions} ".
  1019. "WHERE nid = %d ".
  1020. "AND vid = %d";
  1021. db_query($sql_del, $node->nid, $node->vid);
  1022. //Remove from analysisfeatureprop, analysisfeature, analysis, and analysisprop tables
  1023. $previous_db = tripal_db_set_active('chado');
  1024. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE analysis_id=%d";
  1025. $results = db_query($sql, $analysis_id);
  1026. while ($af = db_fetch_object($results)) {
  1027. db_query("DELETE FROM {analysisfeatureprop} WHERE analysisfeature_id = %d", $af->analysisfeature_id);
  1028. }
  1029. db_query("DELETE FROM {analysisfeature} WHERE analysis_id = %d", $analysis_id);
  1030. db_query("DELETE FROM {analysisprop} WHERE analysis_id = %d", $analysis_id);
  1031. db_query("DELETE FROM {analysis} WHERE analysis_id = %d", $analysis_id);
  1032. tripal_db_set_active($previous_db);
  1033. }
  1034. /*******************************************************************************
  1035. * Update blast analysis
  1036. */
  1037. function chado_analysis_blast_update($node){
  1038. global $user;
  1039. if($node->revision){
  1040. // TODO -- decide what to do about revisions
  1041. } else {
  1042. // Create a timestamp so we can insert it into the chado database
  1043. $time = $node->timeexecuted;
  1044. $month = $time['month'];
  1045. $day = $time['day'];
  1046. $year = $time['year'];
  1047. $timestamp = $month.'/'.$day.'/'.$year;
  1048. // get the analysis_id for this node:
  1049. $sql = "SELECT analysis_id ".
  1050. "FROM {chado_analysis} ".
  1051. "WHERE vid = %d";
  1052. $analysis_id = db_fetch_object(db_query($sql, $node->vid))->analysis_id;
  1053. $sql = "UPDATE {analysis} ".
  1054. "SET name = '%s', ".
  1055. " description = '%s', ".
  1056. " program = '%s', ".
  1057. " programversion = '%s', ".
  1058. " algorithm = '%s', ".
  1059. " sourcename = '%s', ".
  1060. " sourceversion = '%s', ".
  1061. " sourceuri = '%s', ".
  1062. " timeexecuted = '%s' ".
  1063. "WHERE analysis_id = %d ";
  1064. $previous_db = tripal_db_set_active('chado'); // use chado database
  1065. db_query($sql, $node->analysisname, $node->description, $node->program,
  1066. $node->programversion,$node->algorithm,$node->sourcename,
  1067. $node->sourceversion, $node->sourceuri, $timestamp, $analysis_id);
  1068. // Get cvterm_id for 'analysis_blast_settings'
  1069. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  1070. "INNER JOIN cv CV ON CV.cv_id = CVT.cv_id ".
  1071. "WHERE CVT.name = 'analysis_blast_settings' ".
  1072. "AND CV.name = 'tripal'";
  1073. $type_id = db_result(db_query($sql));
  1074. $sql = "UPDATE {analysisprop} ".
  1075. "SET value = '%s' ".
  1076. "WHERE analysis_id = %d AND type_id = %d";
  1077. $blastsettings = $node->blastdb."|".$node->blastfile."|".$node->blastparameters;
  1078. db_query($sql, $blastsettings, $analysis_id, $type_id);
  1079. tripal_db_set_active($previous_db); // switch back to drupal database
  1080. // Add a job if the user wants to parse the xml output
  1081. if($node->blastjob) {
  1082. $job_args[0] = $analysis_id;
  1083. $job_args[1] = $node->blastdb;
  1084. $job_args[2] = $node->blastfile;
  1085. if (is_readable($node->blastfile)) {
  1086. $fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
  1087. tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
  1088. 'tripal_analysis_blast_parseXMLFile', $job_args, $user->uid);
  1089. } else {
  1090. drupal_set_message("Can not open blast output file. Job not scheduled.");
  1091. }
  1092. }
  1093. // Create a title for the analysis node using the unique keys so when the
  1094. // node is saved, it will have a title
  1095. $record = new stdClass();
  1096. // If the analysis has a name, use it as the node title. If not, construct
  1097. // the title using program, programversion, and sourcename
  1098. if ($node->analysisname) {
  1099. $record->title = $node->analysisname;
  1100. } else {
  1101. //Construct node title as "program (version)
  1102. $record->title = "$node->program ($node->programversion)";
  1103. }
  1104. $record->nid = $node->nid;
  1105. drupal_write_record('node',$record,'nid');
  1106. drupal_write_record('node_revisions',$record,'nid');
  1107. }
  1108. }
  1109. /*******************************************************************************
  1110. * When a node is requested by the user this function is called to allow us
  1111. * to add auxiliary data to the node object.
  1112. */
  1113. function chado_analysis_blast_load($node){
  1114. // get the analysis_id for this node:
  1115. $sql = "SELECT analysis_id FROM {chado_analysis} WHERE vid = %d";
  1116. $ana_node = db_fetch_object(db_query($sql, $node->vid));
  1117. $additions = new stdClass();
  1118. if ($ana_node) {
  1119. // get analysis information
  1120. $sql = "SELECT Analysis_id, name AS analysisname, description, program, ".
  1121. " programversion, algorithm, sourcename, sourceversion, ".
  1122. " sourceuri, timeexecuted ".
  1123. "FROM {Analysis} ".
  1124. "WHERE Analysis_id = $ana_node->analysis_id";
  1125. $previous_db = tripal_db_set_active('chado'); // use chado database
  1126. $additions = db_fetch_object(db_query($sql));
  1127. // get number of features assc with this analysis
  1128. $sql = "SELECT count(feature_id) as featurecount ".
  1129. "FROM {Analysisfeature} ".
  1130. "WHERE Analysis_id = %d";
  1131. $additions->featurecount = db_result(db_query($sql, $ana_node->analysis_id));
  1132. // get cvterm_id for 'analysis_blast_settings'
  1133. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  1134. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  1135. "WHERE CVT.name = 'analysis_blast_settings' ".
  1136. "AND CV.name = 'tripal'";
  1137. $type_id = db_result(db_query($sql));
  1138. // get analysisprop information
  1139. $sql = "SELECT value FROM {analysisprop} ".
  1140. "WHERE analysis_id = %d ".
  1141. "AND type_id = %d";
  1142. $analysisprop = db_result(db_query($sql, $ana_node->analysis_id, $type_id));
  1143. $prop_values = explode ("|", $analysisprop, 1);
  1144. $additions->blastdb = $prop_values[0];
  1145. $additions->blastfile = $prop_values[1];
  1146. $additions->blastparameters = $prop_values[2];
  1147. tripal_db_set_active($previous_db); // now use drupal database
  1148. }
  1149. // If the analysis has a name, use it as the node title. If not, construct
  1150. // the title using program programversion, and sourcename
  1151. if ($additions->analysisname) {
  1152. $additions->title = $additions->analysisname;
  1153. } else {
  1154. // Construct node title as "program version (source)
  1155. $additions->title = "$additions->program ($additions->programversion)";
  1156. }
  1157. return $additions;
  1158. }
  1159. /*******************************************************************************
  1160. * This function customizes the view of the chado_analysis node. It allows
  1161. * us to generate the markup.
  1162. */
  1163. function chado_analysis_blast_view ($node, $teaser = FALSE, $page = FALSE) {
  1164. // use drupal's default node view:
  1165. //dprint_r($node);
  1166. if (!$teaser) {
  1167. $node = node_prepare($node, $teaser);
  1168. // When previewing a node submitting form, it shows 'Array' instead of
  1169. // correct date format. We need to format the date here
  1170. $time = $node->timeexecuted;
  1171. if(is_array($time)){
  1172. $month = $time['month'];
  1173. $day = $time['day'];
  1174. $year = $time['year'];
  1175. $timestamp = $year.'-'.$month.'-'.$day;
  1176. $node->timeexecuted = $timestamp;
  1177. }
  1178. // When viewing a node, we need to reformat the analysisprop since we
  1179. // separate each value with a bar |
  1180. if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {
  1181. $prop_values = explode("|", $node->blastdb);
  1182. $node->blastdb = $prop_values[0];
  1183. $node->blastfile = $prop_values[1];
  1184. $node->blastparameters = $prop_values[2];
  1185. }
  1186. }
  1187. return $node;
  1188. }
  1189. /*******************************************************************************
  1190. * Set the permission types that the chado module uses. Essentially we
  1191. * want permissionis that protect creation, editing and deleting of chado
  1192. * data objects
  1193. */
  1194. function tripal_analysis_blast_perm(){
  1195. return array(
  1196. 'access chado_analysis_blast content',
  1197. 'create chado_analysis_blast content',
  1198. 'delete chado_analysis_blast content',
  1199. 'edit chado_analysis_blast content',
  1200. );
  1201. }
  1202. /*******************************************************************************
  1203. * The following function proves access control for users trying to
  1204. * perform actions on data managed by this module
  1205. */
  1206. function chado_analysis_blast_access($op, $node, $account){
  1207. if ($op == 'create') {
  1208. return user_access('create chado_analysis_blast content', $account);
  1209. }
  1210. if ($op == 'update') {
  1211. if (user_access('edit chado_analysis_blast content', $account)) {
  1212. return TRUE;
  1213. }
  1214. }
  1215. if ($op == 'delete') {
  1216. if (user_access('delete chado_analysis_blast content', $account)) {
  1217. return TRUE;
  1218. }
  1219. }
  1220. if ($op == 'view') {
  1221. if (user_access('access chado_analysis_blast content', $account)) {
  1222. return TRUE;
  1223. }
  1224. }
  1225. return FALSE;
  1226. }
  1227. /********************************************************************************
  1228. *
  1229. */
  1230. function tripal_analysis_blast_get_result_object($xml_string,$db,$max,$feature_id, $analysis) {
  1231. $blast_object = new stdClass();
  1232. // Get the parser using db_id
  1233. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  1234. $parser = db_fetch_object(db_query($sql, $db->db_id));
  1235. $db_name = $parser->displayname;
  1236. $is_genbank = $parser->genbank_style;
  1237. $regex_hit_id = $parser->regex_hit_id;
  1238. $regex_hit_def = $parser->regex_hit_def;
  1239. $regex_hit_accession = $parser->regex_hit_accession;
  1240. // set default if regular expressions have not been specified
  1241. if(!$regex_hit_id){
  1242. $regex_hit_id = '/^(.*?)\s.*$/';
  1243. } else {
  1244. $regex_hit_id = '/'.$regex_hit_id.'/';
  1245. }
  1246. if(!$regex_hit_def){
  1247. $regex_hit_def = '/^.*?\s(.*)$/';
  1248. } else {
  1249. $regex_hit_def = '/'.$regex_hit_def.'/';
  1250. }
  1251. if(!$regex_hit_accession){
  1252. $regex_hit_accession = '/^(.*?)\s.*$/';
  1253. } else {
  1254. $regex_hit_accession = '/'.$regex_hit_accession.'/';
  1255. }
  1256. // Get analysis information
  1257. $blast_object->analysis = $analysis;
  1258. $blast_object->db = $db;
  1259. if (!$db_name) {
  1260. $blast_object->title = $analysis->name;
  1261. } else {
  1262. $blast_object->title = $db_name;
  1263. }
  1264. // Find node id for the analysis
  1265. $ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $analysis->aid));
  1266. $blast_object->ana_nid = $ana_nid;
  1267. $blast_object->ana_time = $analysis->time;
  1268. $blast_object->ana_name = $analysis->name;
  1269. // Load the file. This XML file should be an extract
  1270. // of the original XML file with only a single iteration.
  1271. // An iteration is essentially all the hits for a single
  1272. // query sequence.
  1273. $xml_output = simplexml_load_string($xml_string);
  1274. $iteration = '';
  1275. // new XML file parser has added the feature name within <Iteration_query-def> tags.
  1276. if ($xml_output->getName() == 'Iteration') {
  1277. foreach ($xml_output->children() as $xml_tag) {
  1278. if ($xml_tag->getName() == 'Iteration_query-def') {
  1279. // Here we show the feature name again to check if we pull the correct data
  1280. $blast_object->xml_tag = $xml_tag;
  1281. } else if ($xml_tag->getName() == 'Iteration_hits') {
  1282. $iteration = $xml_tag;
  1283. }
  1284. }
  1285. // This is for the file parsed by the old parser
  1286. } else {
  1287. $iteration = $xml_output;
  1288. }
  1289. $number_hits = 0;
  1290. foreach($iteration->children() as $hits){
  1291. $number_hits ++;
  1292. }
  1293. // add the links for updating blast info using Ajax
  1294. $blast_object->max = $max;
  1295. $blast_object->number_hits = $number_hits;
  1296. $blast_object->feature_id = $feature_id;
  1297. $hits_array = array();
  1298. $hit_count = 0;
  1299. foreach($iteration->children() as $hits){
  1300. $hsp_array = array();
  1301. $counter = 0;
  1302. foreach($hits->children() as $hit){
  1303. $best_evalue = 0;
  1304. $best_identity = 0;
  1305. $best_len = 0;
  1306. $element_name = $hit->getName();
  1307. if($element_name == 'Hit_id'){
  1308. // if parsing "name, acc, desc" from three tags (1/3)
  1309. if ($is_genbank) {
  1310. $hit_name = $hit;
  1311. }
  1312. } else if($element_name == 'Hit_def'){
  1313. if($is_genbank){
  1314. $description = $hit;
  1315. } else {
  1316. $accession = preg_replace($regex_hit_accession,"$1",$hit);
  1317. $hit_name = preg_replace($regex_hit_id,"$1",$hit);
  1318. $description = preg_replace($regex_hit_def,"$1",$hit);
  1319. }
  1320. } else if($element_name == 'Hit_accession'){
  1321. // if parsing "name, acc, desc" from three tags (3/3)
  1322. if ($is_genbank){
  1323. $accession = $hit;
  1324. }
  1325. // now run through each HSP for this hit
  1326. } else if($element_name == 'Hit_hsps'){
  1327. foreach($hit->children() as $hsp){
  1328. foreach($hsp->children() as $hsp_info){
  1329. $element_name = $hsp_info->getName();
  1330. if($element_name == 'Hsp_num'){
  1331. $hsp_num = $hsp_info;
  1332. }
  1333. if($element_name == 'Hsp_bit-score'){
  1334. $hsp_bit_score = $hsp_info;
  1335. }
  1336. if($element_name == 'Hsp_score'){
  1337. $hsp_score = $hsp_info;
  1338. }
  1339. if($element_name == 'Hsp_evalue'){
  1340. $hsp_evalue = $hsp_info;
  1341. // use the first evalue for this set of HSPs
  1342. // as the best evalue. This get's shown as
  1343. // info for the overall match.
  1344. if(!$best_evalue){
  1345. $best_evalue = $hsp_evalue;
  1346. }
  1347. }
  1348. if($element_name == 'Hsp_query-from'){
  1349. $hsp_query_from = $hsp_info;
  1350. }
  1351. if($element_name == 'Hsp_query-to'){
  1352. $hsp_query_to = $hsp_info;
  1353. }
  1354. if($element_name == 'Hsp_hit-from'){
  1355. $hsp_hit_from = $hsp_info;
  1356. }
  1357. if($element_name == 'Hsp_hit-to'){
  1358. $hsp_hit_to = $hsp_info;
  1359. }
  1360. if($element_name == 'Hsp_query-frame'){
  1361. $hsp_query_frame = $hsp_info;
  1362. }
  1363. if($element_name == 'Hsp_identity'){
  1364. $hsp_identity = $hsp_info;
  1365. // use the first evalue for this set of HSPs
  1366. // as the best evalue. This get's shown as
  1367. // info for the overall match.
  1368. if(!$best_identity){
  1369. $best_identity = $hsp_identity;
  1370. }
  1371. }
  1372. if($element_name == 'Hsp_positive'){
  1373. $hsp_positive = $hsp_info;
  1374. }
  1375. if($element_name == 'Hsp_align-len'){
  1376. $hsp_align_len = $hsp_info;
  1377. // use the first evalue for this set of HSPs
  1378. // as the best evalue. This get's shown as
  1379. // info for the overall match.
  1380. if(!$best_len){
  1381. $best_len = $hsp_align_len;
  1382. }
  1383. }
  1384. if($element_name == 'Hsp_qseq'){
  1385. $hsp_qseq = $hsp_info;
  1386. }
  1387. if($element_name == 'Hsp_hseq'){
  1388. $hsp_hseq = $hsp_info;
  1389. }
  1390. if($element_name == 'Hsp_midline'){
  1391. $hsp_midline = $hsp_info;
  1392. }
  1393. }
  1394. $hsp_content = array();
  1395. $hsp_content ['hsp_num'] = $hsp_num;
  1396. $hsp_content ['bit_score'] = $hsp_bit_score;
  1397. $hsp_content ['score'] = $hsp_score;
  1398. $hsp_content ['evalue'] = $hsp_evalue;
  1399. $hsp_content ['query_frame'] = $hsp_query_frame;
  1400. $hsp_content ['qseq'] = $hsp_qseq;
  1401. $hsp_content ['midline'] = $hsp_midline;
  1402. $hsp_content ['hseq'] = $hsp_hseq;
  1403. $hsp_content ['hit_from'] = $hsp_hit_from;
  1404. $hsp_content ['hit_to'] = $hsp_hit_to;
  1405. $hsp_content['identity'] = $hsp_identity;
  1406. $hsp_content['align_len'] = $hsp_align_len;
  1407. $hsp_content['positive'] = $hsp_positive;
  1408. $hsp_content['query_from'] = $hsp_query_from;
  1409. $hsp_content['query_to'] = $hsp_query_to;
  1410. $hsp_array[$counter] = $hsp_content;
  1411. $counter ++;
  1412. }
  1413. }
  1414. }
  1415. $arrowr_url = url(drupal_get_path('theme', 'tripal')."/images/arrow_r.png");
  1416. $hits_array[$hit_count]['arrowr_url'] = $arrowr_url;
  1417. $hits_array[$hit_count]['accession'] = $accession;
  1418. $hits_array[$hit_count]['hit_name'] = $hit_name;
  1419. if($accession && $db->urlprefix){
  1420. $hits_array[$hit_count]['hit_url'] = "$db->urlprefix$accession";
  1421. } else {
  1422. // Test if this is another feature in the database
  1423. $sql = "SELECT feature_id FROM {feature} WHERE uniquename = '%s'";
  1424. $previous_db = db_set_active('chado');
  1425. $hit_feature_id = db_result(db_query($sql, $hit_name));
  1426. db_set_active($previous_db);
  1427. // If it is, add link to that feature
  1428. if ($hit_feature_id) {
  1429. $hits_array[$hit_count]['hit_url'] = "ID$hit_feature_id";
  1430. }
  1431. }
  1432. $hits_array[$hit_count]['best_evalue'] = $best_evalue;
  1433. $percent_identity = number_format($best_identity/$best_len*100, 2);
  1434. $hits_array[$hit_count]['percent_identity'] = $percent_identity;
  1435. $hits_array[$hit_count]['description'] = $description;
  1436. $hits_array[$hit_count]['hsp'] = $hsp_array;
  1437. $hit_count ++;
  1438. // if we've hit the maximum number of hits then return
  1439. if($max > 0 && $hit_count >= $max){
  1440. break;
  1441. }
  1442. }
  1443. $blast_object->hits_array = $hits_array;
  1444. return $blast_object;
  1445. }