tripal_gbrowse.align_features.inc 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. <?php
  2. /**
  3. * Form to initialize an align library tripal job
  4. */
  5. function tripal_gbrowse_align_features_form ($form_state) {
  6. $form = array();
  7. // compile all analysis' as options
  8. $analysis_options = array();
  9. $results = tripal_core_chado_select('analysis',array('analysis_id','name', 'program'), array());
  10. foreach ($results as $r) {
  11. $analysis_options[ 'A'.$r->analysis_id ] = $r->name .' --'.$r->program;
  12. }
  13. // compile all libraries as options
  14. $library_options = array();
  15. $results = tripal_core_chado_select('library',array('library_id','name', 'uniquename'), array());
  16. foreach ($results as $r) {
  17. $library_options[ 'L'.$r->library_id ] = $r->name .' --'.$r->uniquename;
  18. }
  19. $form['description'] = array(
  20. '#type' => 'item',
  21. '#value' => 'This form allows you to select a query library/analysis and align the '
  22. .'features that are part of that query library/analysis against those from a database '
  23. .'library/analysis using BLAST. Depending upon the Alignment Criteria, the top BLAST '
  24. .'result(s) for each query feature will be used to determine the location of the query '
  25. .'feature on a database feature.',
  26. );
  27. $form['q'] = array(
  28. '#type' => 'fieldset',
  29. '#title' => 'Features to be Aligned (Query)',
  30. '#description' => 'Please select the library or analysis below which groups the features you '
  31. .'want aligned together.',
  32. );
  33. $form['q']['query'] = array(
  34. '#type' => 'select',
  35. '#title' => 'Query Features',
  36. '#options' => array(
  37. 'Libraries' => $library_options,
  38. "Analaysis'" => $analysis_options
  39. ),
  40. '#default_value' => $form_state['values']['query'],
  41. );
  42. $form['d'] = array(
  43. '#type' => 'fieldset',
  44. '#title' => 'Features to Align To (Database)',
  45. '#description' => 'Please select the library of analysis below, which contains the features '
  46. .'you want to align the query features selected above to. Only features in the selected '
  47. .'library/analysis with sequence data will be used.',
  48. );
  49. $form['d']['database'] = array(
  50. '#type' => 'select',
  51. '#title' => 'Database Features',
  52. '#options' => array(
  53. 'Libraries' => $library_options,
  54. "Analaysis'" => $analysis_options
  55. ),
  56. '#default_value' => $form_state['values']['database'],
  57. );
  58. $form['b'] = array(
  59. '#type' => 'fieldset',
  60. '#title' => 'Alignment Criteria',
  61. );
  62. $form['b']['program'] = array(
  63. '#type' => 'radios',
  64. '#title' => 'Alignment Program',
  65. '#options' => array(
  66. 'blast' => 'BLAST',
  67. 'blat' => 'BLAT',
  68. ),
  69. '#default_value' => 'blat',
  70. );
  71. $form['b']['blat'] = array(
  72. '#type' => 'fieldset',
  73. '#title' => 'BLAT Options',
  74. '#collapsible' => TRUE,
  75. '#collapsed' => TRUE,
  76. );
  77. $form['b']['blat']['min_identity'] = array(
  78. '#type' => 'textfield',
  79. '#title' => 'Minimum Identity',
  80. '#default_value' => '90',
  81. );
  82. $form['b']['blast'] = array(
  83. '#type' => 'fieldset',
  84. '#title' => 'BLAST Options',
  85. '#collapsible' => TRUE,
  86. '#collapsed' => TRUE,
  87. );
  88. $form['b']['blast']['gapped_alignment'] = array(
  89. '#type' => 'checkbox',
  90. '#title' => 'Gapped Alignment?',
  91. '#default_value' => TRUE,
  92. );
  93. $form['b']['blast']['evalue'] = array(
  94. '#type' => 'textfield',
  95. '#title' => 'Expectation Values (E-value) Cutoff',
  96. '#description' => 'To enter in scientific notation (ie: 5 x 10-6), enter [base number]e[power] (ie: 5e-6)',
  97. '#default_value' => 10.0
  98. );
  99. $form['g'] = array(
  100. '#type' => 'fieldset',
  101. '#title' => 'GBrowse Information',
  102. );
  103. $form['g']['source'] = array(
  104. '#type' => 'textfield',
  105. '#title' => 'Source of Features',
  106. '#description' => 'The source of the features grouped by the selected query library'
  107. );
  108. //Sending query to the database
  109. $resource = db_query('SELECT * FROM {tripal_gbrowse_instances}');
  110. $items = array();
  111. while($record = db_fetch_object($resource)){
  112. $items[$record->gbrowse_id]= $record->gbrowse_name;
  113. }
  114. //GBrowse Instances
  115. $form['g']['gbrowse_id'] = array(
  116. '#type' => 'select',
  117. '#title' => t('GBrowse Instances'),
  118. '#options' => $items,
  119. '#description' => t('Selected GBrowse Instances to load the query features into.'),
  120. );
  121. $form['submit'] = array(
  122. '#type' => 'submit',
  123. '#name' => 'align',
  124. '#value' => 'Align Features',
  125. );
  126. return $form;
  127. }
  128. /**
  129. * Form submit to initialize an align library tripal job
  130. */
  131. function tripal_gbrowse_align_features_form_submit ($form, &$form_state) {
  132. switch($form_state['clicked_button']['#name']) {
  133. case 'align':
  134. $options = array();
  135. // Query
  136. if (preg_match('/(L|A)(\d+)/',$form_state['values']['query'], $matches)) {
  137. if ($matches[1] == 'L') {
  138. $options['query_type'] = 'library';
  139. } else {
  140. $options['query_type'] = 'analysis';
  141. }
  142. $options['query_id'] = $matches[2];
  143. }
  144. // Database
  145. if (preg_match('/(L|A)(\d+)/',$form_state['values']['database'], $matches)) {
  146. if ($matches[1] == 'L') {
  147. $options['db_type'] = 'library';
  148. } else {
  149. $options['db_type'] = 'analysis';
  150. }
  151. $options['db_id'] = $matches[2];
  152. }
  153. $options['source'] = $form_state['values']['source'];
  154. // db options
  155. $sql = 'SELECT * FROM {tripal_gbrowse_instances} WHERE gbrowse_id = %d';
  156. $r = db_fetch_object(db_query($sql,$form_state['values']['gbrowse_id']));
  157. $options['dbname'] = $r->database_name;
  158. $options['dbuser'] = $r->database_user;
  159. $options['dbpass'] = $r->user_password;
  160. if ($form_state['values']['program'] == 'blat') {
  161. // blat options
  162. $options['min_identity'] = $form_state['values']['min_identity'];
  163. global $user;
  164. tripal_add_job(
  165. 'Align '.$options['query_type'].' features ('.$options['query_type'].'_id='.$options['query_id'].')',
  166. 'tripal_gbrowse',
  167. 'tripal_gbrowse_align_features_by_blat',
  168. array(serialize($options)),
  169. $user->uid
  170. );
  171. } elseif ($form_state['values']['program'] == 'blast') {
  172. // blast options
  173. $options['evalue'] = $form_state['values']['evalue'];
  174. $options['gapped_alignment'] = $form_state['values']['gapped_alignment'];
  175. global $user;
  176. tripal_add_job(
  177. 'Align '.$options['query_type'].' features ('.$options['query_type'].'_id='.$options['query_id'].')',
  178. 'tripal_gbrowse',
  179. 'tripal_gbrowse_align_features_by_blast',
  180. array(serialize($options)),
  181. $user->uid
  182. );
  183. }
  184. break;
  185. }
  186. }
  187. /**
  188. * Aligns feature from a query library/analysis to a database library/analysis,
  189. * saving the results as a GFF3 file and then loading it into the selected gbrowse instance
  190. *
  191. * @param $options
  192. * An array containing options needed to align features and create featurelocs
  193. * -query_type: the type of chado grouping containing query features (either library or analysis)
  194. * -query_id: the library_id/analysis_id containing the query features
  195. * -db_type: the type of chado grouping containing database features (either library or analysis)
  196. * -db_id: the library_id/analysis_id containing the database features
  197. * -dbname: name of the MySQL GBrowse database to load into
  198. * -dbuser: name of the user with permission to load into the above database
  199. * -dbpass: the password for the above user
  200. */
  201. function tripal_gbrowse_align_features_by_blast ($options) {
  202. $options = unserialize($options);
  203. print 'Query: '.$options['query_type'].' where '.$options['query_type'].'_id='.$options['query_id']."\n";
  204. print 'Database: '.$options['db_type'].' where '.$options['db_type'].'_id='.$options['db_id']."\n";
  205. // Generate FASTA ---------------------------------------
  206. print "\nGenerating fasta files for query and database...\n";
  207. $query_file = tripal_gbrowse_export_fasta_for_features( $options['query_type'], $options['query_id'], TRUE );
  208. print "\t".$query_file['file']."\n";
  209. $db_file = tripal_gbrowse_export_fasta_for_features( $options['db_type'], $options['db_id'], TRUE, TRUE );
  210. print "\t".$db_file['file']."\n";
  211. // Align using BLAST ------------------------------------
  212. print "\nAligning features using BLAST...\n";
  213. print "\tFormating Database FASTA into BLASTdb...\n";
  214. $db = '/tmp/exported_'.$options['db_type'].'_'.$options['db_id'];
  215. $formatdb_cmd = 'formatdb -n '.$db.' -p F -i '.$db_file['file'];
  216. print "\t\t".$formatdb_cmd."\n";
  217. exec($formatdb_cmd);
  218. print "\tExecuting BLAST...\n";
  219. $blast_outfile = $db.'_by_'.$options['query_type'].'_'.$options['query_id'].'.blast.xml';
  220. $blastall_cmd = 'blastall -p blastn -d '.$db.' -i '.$query_file['file'].' -m 7 -o '.$blast_outfile.' -e '.$options['evalue'];
  221. if ($options['gapped_alignment']) {
  222. $blastall_cmd .= ' -g';
  223. }
  224. print "\t\t".$blastall_cmd."\n";
  225. exec($blastall_cmd);
  226. // Parse BLAST results ----------------------------------
  227. print "\nParsing Blast Results into GFF3...\n";
  228. $gff3_file = $db.'_by_'.$options['query_type'].'_'.$options['query_id'].'.gff3';
  229. $fgff3 = fopen($gff3_file, 'w');
  230. fwrite($fgff3, "##gff-version 3\n");
  231. $iteration = tripal_gbrowse_get_next_xml_record ($blast_outfile, "<Iteration>");
  232. while ($iteration) {
  233. //print "Record:".$iteration['record']->asXML()."\n";
  234. //print "Query: ".$iteration['record']->{'Iteration_query-def'}."\n";
  235. // Find the best Hit by looking at the bit scores of the hsps
  236. // the larger the bit score the better the alignment
  237. $hits = array();
  238. $scores = array();
  239. foreach ($iteration['record']->Iteration_hits->Hit as $hit) {
  240. $score = 0;
  241. $num = 0;
  242. foreach ($hit->{'Hit_hsps'}->{'Hsp'} as $hsp) {
  243. //print 'HSP:'.$hsp->asXML()."\n";
  244. $score = $score + $hsp->{'Hsp_bit-score'};
  245. $num++;
  246. }
  247. $avg = round($score / $num,2);
  248. $hit->{'Hit_bit-score'} = $avg;
  249. $hits[] = array('score' => $avg, 'hit' => $hit);
  250. }
  251. usort($hits, 'tripal_gbrowse_sort_by_score');
  252. $best_hit = $hits[0]['hit'];
  253. //print "\tBest Hit:".$best_hit->Hit_def.' ('.$best_hit->{'Hit_bit-score'}.")\n";
  254. // generate gff3 for the best hit
  255. $query_name = $iteration['record']->{'Iteration_query-def'};
  256. $db_name = $best_hit->Hit_def;
  257. if (isset($db_file['noseq_features'][$db_name])) {
  258. $db_offset = $db_file['noseq_features'][$db_name]['start'];
  259. $db_name = $db_file['noseq_features'][$db_name]['parent']['uniquename'];
  260. } else {
  261. $db_offset = 0;
  262. }
  263. $lines = array();
  264. $hit_start = 99999999999999999999;
  265. $hit_end = 0;
  266. foreach ($best_hit->{'Hit_hsps'}->{'Hsp'} as $hsp) {
  267. if ($hit_start > (int) $hsp->{'Hsp_hit-from'}[0]) { $hit_start = (int) $hsp->{'Hsp_hit-from'}[0]; }
  268. if ($hit_end < (int) $hsp->{'Hsp_hit-to'}[0]) { $hit_end = (int) $hsp->{'Hsp_hit-to'}; }
  269. $lines[] = implode("\t", array(
  270. $db_name,
  271. $options['source'],
  272. 'match_part',
  273. $hsp->{'Hsp_hit-from'} + $db_offset,
  274. $hsp->{'Hsp_hit-to'} + $db_offset,
  275. $hsp->{'Hsp_bit-score'},
  276. '.',
  277. '.',
  278. 'ID='.$query_name.'_'.$hsp->{'Hsp_num'}.';Parent='.$query_name
  279. ))."\n";
  280. }
  281. fwrite($fgff3, implode("\t", array(
  282. $db_name,
  283. $options['source'],
  284. 'match',
  285. $hit_start + $db_offset,
  286. $hit_end + $db_offset,
  287. $best_hit->{'Hit_bit-score'},
  288. '.',
  289. '.',
  290. 'ID='.$query_name.';Name='.$query_name
  291. ))."\n");
  292. fwrite($fgff3,implode('',$lines));
  293. // get next iteration xml record
  294. $last_iteration = $iteration;
  295. //print "Getting Iteration...\n";
  296. $iteration = tripal_gbrowse_get_next_xml_record ($blast_outfile, "<Iteration>", $last_iteration['start_line_num']);
  297. }
  298. print "\nLoading GFF3 into GBrowse...\n";
  299. //The loading script: bp_seqfeature_load.pl, allows loading of data to specific file
  300. $command= "bp_seqfeature_load.pl -u '" .$options['dbuser']. "' -p '" .$options['dbpass']. "' -d " .$options['dbname']. " " . $gff3_file;
  301. print "\t".$command."\n";
  302. exec($command);
  303. }
  304. /**
  305. * Aligns feature from a query library/analysis to a database library/analysis,
  306. * saving the results as a GFF3 file and then loading it into the selected gbrowse instance
  307. *
  308. * @param $options
  309. * An array containing options needed to align features and create featurelocs
  310. * -query_type: the type of chado grouping containing query features (either library or analysis)
  311. * -query_id: the library_id/analysis_id containing the query features
  312. * -db_type: the type of chado grouping containing database features (either library or analysis)
  313. * -db_id: the library_id/analysis_id containing the database features
  314. * -dbname: name of the MySQL GBrowse database to load into
  315. * -dbuser: name of the user with permission to load into the above database
  316. * -dbpass: the password for the above user
  317. */
  318. function tripal_gbrowse_align_features_by_blat ($options,$job_id) {
  319. $options = unserialize($options);
  320. print 'Query: '.$options['query_type'].' where '.$options['query_type'].'_id='.$options['query_id']."\n";
  321. print 'Database: '.$options['db_type'].' where '.$options['db_type'].'_id='.$options['db_id']."\n";
  322. // Generate FASTA ---------------------------------------
  323. print "\nGenerating fasta files for query and database...\n";
  324. $query_file = tripal_gbrowse_export_fasta_for_features( $options['query_type'], $options['query_id'], TRUE );
  325. print "\t".$query_file['file']."\n";
  326. tripal_job_set_progress($job_id, 12);
  327. $db_file = tripal_gbrowse_export_fasta_for_features( $options['db_type'], $options['db_id'], TRUE, TRUE );
  328. print "\t".$db_file['file']."\n";
  329. tripal_job_set_progress($job_id, 25);
  330. // Align using BLAT ------------------------------------
  331. print "\nAligning features using BLAT...\n";
  332. $blat_outfile = '/tmp/alignment_'.$options['db_type'].'-'.$options['db_id'].'_by_'.$options['query_type'].'-'.$options['query_id'].'.psl';
  333. $blat_cmd = 'blat '.$db_file['file'].' '.$query_file['file'].' -q=dnax -t=dnax -noHead -minIdentity='.$options['min_identity'].' '.$blat_outfile;
  334. print "\t\t".$blat_cmd."\n";
  335. exec($blat_cmd);
  336. tripal_job_set_progress($job_id, 50);
  337. // Parse BLAST results ----------------------------------
  338. $total_lines = trim(`wc --lines < $blat_outfile`);
  339. $interval = intval($total_lines/5);
  340. $percent_per_line = 25/$total_lines;
  341. $num_lines = 0;
  342. $query_seq = array();
  343. print "\nParsing BLAT results into GFF3 (".$total_lines." lines)...\n";
  344. $gff3_file = '/tmp/alignment_'.$options['db_type'].'-'.$options['db_id'].'_by_'.$options['query_type'].'-'.$options['query_id'].'.gff3';
  345. $fgff3 = fopen($gff3_file, 'w');
  346. fwrite($fgff3, "##gff-version 3\n");
  347. $bh = fopen($blat_outfile, 'r');
  348. while (!feof($bh)) {
  349. $line = explode("\t", fgets($bh));
  350. $num_lines++;
  351. if (($num_lines%$interval) == 0) { tripal_job_set_progress($job_id, intval($percent_per_line * $num_lines)); }
  352. $print_match = TRUE;
  353. $db_name = $line[13];
  354. if (isset($db_file['noseq_features'][$db_name])) {
  355. $db_offset = $db_file['noseq_features'][$db_name]['start'];
  356. $db_name = $db_file['noseq_features'][$db_name]['parent']['uniquename'];
  357. } else {
  358. $db_offset = 0;
  359. }
  360. $query_id = $line[9];
  361. if (!isset($query_seq[ $line[9] ])) {
  362. $query_seq[ $line[9] ]['id'] = 0;
  363. $query_seq[ $line[9] ]['start'] = $line[15] + $db_offset;
  364. $query_seq[ $line[9] ]['end'] = $line[16] + $db_offset;
  365. $query_id .= '_0';
  366. } elseif ( abs($line[15]+$db_offset-$query_seq[ $line[9] ]['start']) < 5 ) {
  367. $print_match = FALSE;
  368. $query_id .= '_' . $query_seq[ $line[9] ]['id'];
  369. }else {
  370. $query_seq[ $line[9] ]['id']++;
  371. $query_id .= '_' . $query_seq[ $line[9] ]['id'];
  372. }
  373. // match line
  374. if ($print_match) {
  375. fwrite($fgff3, implode("\t", array(
  376. $db_name,
  377. $options['source'],
  378. 'match',
  379. $line[15] + $db_offset,
  380. $line[16] + $db_offset,
  381. '.',
  382. $line[8][1],
  383. '.',
  384. 'ID='.$query_id.';Name='.$line[9]
  385. ))."\n");
  386. }
  387. // match parts
  388. $parts_size = explode(',',trim($line[18]));
  389. $parts_start = explode(',',trim($line[20]));
  390. foreach ($parts_size as $k => $length) {
  391. if (!empty($parts_start[$k])) {
  392. fwrite($fgff3, implode("\t", array(
  393. $db_name,
  394. $options['source'],
  395. 'match_part',
  396. $parts_start[$k] + $db_offset,
  397. $parts_start[$k] + $length + $db_offset,
  398. '.',
  399. $line[8][1],
  400. '.',
  401. 'ID='.$query_id.'_'.$k.';Parent='.$query_id
  402. ))."\n");
  403. }
  404. }
  405. }
  406. tripal_job_set_progress($job_id, 75);
  407. // Load into GBrowse ------------------------------------
  408. print "\nLoading GFF3 into GBrowse...\n";
  409. //The loading script: bp_seqfeature_load.pl, allows loading of data to specific file
  410. $command= "bp_seqfeature_load.pl -u '" .$options['dbuser']. "' -p '" .$options['dbpass']. "' -d " .$options['dbname']. " " . $gff3_file;
  411. print "\t".$command."\n";
  412. exec($command);
  413. }
  414. /**
  415. * Creates a fasta file for a given chado grouping of features
  416. *
  417. * @param $type
  418. * The type of chado grouping. Allowed values are either library or analysis
  419. * @param $id
  420. * The library_id/analysis_id of the chado grouping
  421. * @return
  422. * The name of the multi-fasta file containing records for all features with residues
  423. * in the supplied library/analysis
  424. */
  425. function tripal_gbrowse_export_fasta_for_features ($type, $id, $use_parent_seq = FALSE, $save_offset = FALSE) {
  426. $fasta_file = '/tmp/exported_fasta-'.$type.'-'.$id.'.fasta';
  427. $noseq_features = array();
  428. $fh = fopen($fasta_file,'w');
  429. $sql = 'SELECT f.uniquename, f.residues, fl.fmin, fl.fmax, fl.srcfeature_id as parent_feature_id FROM feature f '
  430. .'LEFT JOIN featureloc fl ON fl.feature_id=f.feature_id ';
  431. $parent_sql = 'SELECT p.feature_id as parent_feature_id, p.uniquename as parent_uniquename, count(fl.feature_id) FROM feature p '
  432. .'LEFT JOIN featureloc fl ON fl.srcfeature_id=p.feature_id '
  433. ."WHERE fl.feature_id IN (SELECT feature_id FROM feature WHERE residues='') AND ";
  434. if ($type == 'library') {
  435. $sql .= 'LEFT JOIN library_feature lf ON f.feature_id=lf.feature_id WHERE lf.library_id=%d';
  436. $parent_sql .= 'fl.feature_id IN (SELECT feature_id FROM library_feature WHERE library_id=%d) GROUP BY p.uniquename, p.feature_id';
  437. } elseif ($type == 'analysis') {
  438. $sql .= 'LEFT JOIN analysisfeature af ON f.feature_id=af.feature_id WHERE af.analysis_id=%d';
  439. $parent_sql .= 'fl.feature_id IN (SELECT feature_id FROM analysisfeature WHERE analysis_id=%d) GROUP BY p.uniquename, p.feature_id';
  440. $resource = db_query($sql, $id);
  441. } else {
  442. print "ERROR: Unable to generate FASTA due to unrecognized type -".$type."!\n";
  443. return FALSE;
  444. }
  445. // check if some don't have sequence but are aligned on a parent who does
  446. //print "SQL: ".$parent_sql."\n";
  447. $resource = db_query($parent_sql, $id);
  448. $parent_seq = array();
  449. $residues_seq = 'SELECT residues FROM feature WHERE feature_id=%d';
  450. while ($r = db_fetch_object($resource)) {
  451. //print 'Creating index for '.$r->parent_feature_id. " (".$r->parent_uniquename.")\n";
  452. $seq = db_fetch_object(db_query($residues_seq, $r->parent_feature_id));
  453. //print "Residues:".$seq->residues."\n";
  454. if (!empty($seq->residues)) {
  455. //print "\tGot Seq!\n";
  456. }
  457. $parent_seq[ $r->parent_feature_id ] = array(
  458. 'residues' => $seq->residues,
  459. 'uniquename' => $r->parent_uniquename
  460. );
  461. }
  462. $resource = db_query($sql, $id);
  463. while ($r = db_fetch_object($resource)) {
  464. if (!empty($r->residues)) {
  465. fwrite($fh, '>'.$r->uniquename."\n");
  466. fwrite($fh, wordwrap($r->residues,80,"\n",TRUE)."\n");
  467. } else {
  468. //print $r->uniquename." (based on ".$r->parent_feature_id.' -'.$parent_seq[$r->parent_feature_id]['uniquename'].")\n";
  469. if (!empty($parent_seq[$r->parent_feature_id]['residues'])) {
  470. //print "\tHave Seq\n";
  471. fwrite($fh, '>'.$r->uniquename." (based on ".$parent_seq[$r->parent_feature_id]['uniquename'].")\n");
  472. $seq = substr($parent_seq[$r->parent_feature_id]['residues'], $r->fmin, $r->fmax - $r->fmin);
  473. fwrite($fh, wordwrap($seq,80,"\n",TRUE)."\n");
  474. if ($save_offset) {
  475. $noseq_features[ $r->uniquename ] = array(
  476. 'start' => $r->fmin,
  477. 'end' => $r->fmax,
  478. 'parent' => array(
  479. 'uniquename' => $parent_seq[$r->parent_feature_id]['uniquename'],
  480. ),
  481. );
  482. }
  483. }
  484. }
  485. }
  486. fclose($fh);
  487. return array(
  488. 'file' => $fasta_file,
  489. 'noseq_features' => $noseq_features
  490. );
  491. }
  492. /**
  493. * Retrieves the next xml record with $record_identifier
  494. *
  495. * Assumption: the end tag for $record_identifier is one line before the next opening tag
  496. * @param $xml_file
  497. * The file containing the xml records; must supply the full path
  498. * @param $record_identifier
  499. * The opening tag enclosing a record (ie: <Iteration>)
  500. * @param $last_record_line_num
  501. * The line number of the openin tag for the last record
  502. *
  503. * @return
  504. * An array describing the next xml record
  505. * -record: the simpleXML record
  506. * -start_line_num: the line number in the file that this record starts at
  507. * -end_line_num: the line number in the file that this record ends at
  508. */
  509. function tripal_gbrowse_get_next_xml_record ($xml_file, $record_identifier, $last_record_line_num = NULL) {
  510. // If first record ------------------------------------------------
  511. if (!$last_record_line_num) {
  512. $cmd = 'grep -n "'.$record_identifier.'" -m 2 '.$xml_file;
  513. exec($cmd, $line_num_raw);
  514. // get start of next record
  515. if (preg_match('/(\d+):.*/',$line_num_raw[0],$matches)) {
  516. $start = $matches[1];
  517. } else {
  518. return FALSE;
  519. }
  520. // get end of next record
  521. if (preg_match('/(\d+):.*/',$line_num_raw[1],$matches)) {
  522. $end = $matches[1] -1;
  523. } else {
  524. return FALSE;
  525. }
  526. // If not first record --------------------------------------------
  527. } else {
  528. $cmd = 'tail --lines=+'.($last_record_line_num+1).' '.$xml_file.' 2>/dev/null | grep -n "'.$record_identifier.'" -m 2 2>/dev/null ';
  529. exec($cmd, $line_num_raw);
  530. // get start of next record
  531. if (preg_match('/(\d+):.*/',$line_num_raw[0],$matches)) {
  532. $start = $matches[1] + $last_record_line_num;
  533. } else {
  534. return FALSE;
  535. }
  536. // get end of next record
  537. if (preg_match('/(\d+):.*/',$line_num_raw[1],$matches)) {
  538. $end = $matches[1] + $last_record_line_num -1;
  539. } else {
  540. return FALSE;
  541. }
  542. }
  543. $cmd = 'tail --lines=+'.$start.' '.$xml_file.' 2>/dev/null | head -n '.($end-$start+1).' 2>/dev/null';
  544. exec($cmd, $xml);
  545. //print "XML:".implode("\n",$xml)."\n";
  546. if (!$xml) {
  547. return FALSE;
  548. }
  549. $xml_record = new SimpleXMLElement(implode("\n",$xml));
  550. return array(
  551. 'record' => $xml_record,
  552. 'start_line_num' => $start,
  553. 'end_line_num' => $end,
  554. );
  555. }
  556. /**
  557. * Custom sort function to be used with usort
  558. * Sorts an array( 'score' => \d+, 'hit' => simplexml obj)
  559. */
  560. function tripal_gbrowse_sort_by_score ($a, $b) {
  561. if ($a['score'] == $b['score']) {
  562. return 0;
  563. } elseif ($a['score'] < $b['score']) {
  564. return 1;
  565. } else {
  566. return -1;
  567. }
  568. }