tripal_feature.seq_extract.inc 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. <?php
  2. /**
  3. * @file
  4. * Interface for downloading feature sequences
  5. */
  6. /**
  7. * The page allowing users to download feature sequences
  8. *
  9. * @ingroup tripal_feature
  10. */
  11. function tripal_feature_seq_extract_page() {
  12. if ($_SESSION['tripal_feature_seq_extract']['download']) {
  13. $genus = $_SESSION['tripal_feature_seq_extract']['genus'];
  14. $species = $_SESSION['tripal_feature_seq_extract']['species'];
  15. $analysis = $_SESSION['tripal_feature_seq_extract']['analysis'];
  16. $ftype = $_SESSION['tripal_feature_seq_extract']['ftype'];
  17. $fnames = $_SESSION['tripal_feature_seq_extract']['fnames'];
  18. $upstream = $_SESSION['tripal_feature_seq_extract']['upstream'];
  19. $downstream = $_SESSION['tripal_feature_seq_extract']['downstream'];
  20. $format = $_SESSION['tripal_feature_seq_extract']['format'];
  21. $use_parent = $_SESSION['tripal_feature_seq_extract']['use_parent'];
  22. $aggregate = $_SESSION['tripal_feature_seq_extract']['aggregate'];
  23. $agg_types = $_SESSION['tripal_feature_seq_extract']['agg_types'];
  24. unset($_SESSION['tripal_feature_seq_extract']['download']);
  25. if ($format == 'fasta_html') {
  26. drupal_add_http_header('Content-Type: text/html');
  27. }
  28. else {
  29. drupal_add_http_header('Content-Type: text');
  30. drupal_add_http_header('Content-Disposition: attachment; filename="sequences.fasta.txt"');
  31. }
  32. tripal_feature_seq_extract_get_features(NULL, $genus, $species, $analysis,
  33. $ftype, $fnames['items_array'], $upstream, $downstream, $format, $use_parent, $aggregate,
  34. $agg_types);
  35. return;
  36. }
  37. // generate the search form
  38. $output .= '';
  39. if (user_access('access administration pages')) {
  40. $output .= theme('tripal_admin_message', array('message' => "
  41. Administrators, the " . l('organism_feature_count', 'admin/tripal/schema/mviews') . " and
  42. " . l('analysis_organism', 'admin/tripal/schema/mviews') . " materialized
  43. views must be populated before using this form. Those views should be re-populated
  44. when new data is added."
  45. ));
  46. }
  47. $output .= "<div id=\"tripal-feature-seq-extract-form-block\">";
  48. $output .= drupal_get_form('tripal_feature_seq_extract_form');
  49. $output .= "</div>";
  50. return $output;
  51. }
  52. /**
  53. * Theme the Form to choose which features to extract sequence for
  54. *
  55. * @ingroup tripal_feature
  56. */
  57. function theme_tripal_feature_seq_extract_form($form) {
  58. $rows = array(
  59. 0 => array(
  60. array('data' => drupal_render($form['description']), 'colspan' => 3),
  61. ),
  62. 1 => array(
  63. drupal_render($form['genus']),
  64. drupal_render($form['species']) ,
  65. drupal_render($form['ftype']),
  66. ),
  67. 2 => array(
  68. array('data' => drupal_render($form['analysis']), 'colspan' => 3),
  69. //drupal_render($form['format']),
  70. ),
  71. 3 => array(
  72. array('data' => drupal_render($form['fnames']), 'colspan' => 2),
  73. drupal_render($form['upstream']) . drupal_render($form['downstream']) ,
  74. ),
  75. 4 => array(
  76. array(
  77. 'data' => drupal_render($form['advanced']),
  78. 'colspan' => 3,
  79. ),
  80. ),
  81. 5 => array(
  82. array(
  83. 'data' => drupal_render($form['retrieve_btn']) . drupal_render($form['reset_btn']),
  84. 'colspan' => 3,
  85. ),
  86. ),
  87. );
  88. $headers = array();
  89. $table = theme('table', $headers, $rows, array('id' => 'tripal-feature-seq-extract-form-table', 'border' => '0'));
  90. $markup .= $table;
  91. $form['criteria'] = array(
  92. '#type' => 'markup',
  93. '#value' => $markup,
  94. '#weight' => -10,
  95. );
  96. return drupal_render($form);
  97. }
  98. /**
  99. * Form to choose which features to extract sequence for
  100. *
  101. * @ingroup tripal_feature
  102. */
  103. function tripal_feature_seq_extract_form(&$form_state = NULL) {
  104. tripal_core_ahah_init_form();
  105. // we want to allow the query string to provide values for the form
  106. if ($_GET['fnames']) {
  107. $form_state['values']['fnames']['items'] = $_GET['fnames'];
  108. }
  109. if ($_GET['genus']) {
  110. $form_state['values']['genus'] = $_GET['genus'];
  111. }
  112. if ($_GET['species']) {
  113. $form_state['values']['species'] = $_GET['species'];
  114. }
  115. if ($_GET['ftype']) {
  116. $form_state['values']['ftype'] = $_GET['ftype'];
  117. }
  118. if ($_GET['analysis']) {
  119. $form_state['values']['analysis'] = $_GET['analysis'];
  120. }
  121. if ($_GET['upstream']) {
  122. $form_state['values']['upstream'] = $_GET['upstream'];
  123. }
  124. if ($_GET['downstream']) {
  125. $form_state['values']['downstream'] = $_GET['downstream'];
  126. }
  127. if ($_GET['use_parent']) {
  128. $form_state['values']['use_parent'] = $_GET['use_parent'];
  129. }
  130. if ($_GET['aggregate']) {
  131. $form_state['values']['aggregate'] = $_GET['aggregate'];
  132. }
  133. if ($_GET['agg_types']) {
  134. $form_state['values']['agg_types'] = $_GET['agg_types'];
  135. }
  136. // get defaults
  137. $dgenus = isset($form_state['values']['genus']) ? $form_state['values']['genus'] : $_SESSION['tripal_feature_seq_extract']['genus'];
  138. $dspecies = isset($form_state['values']['species']) ? $form_state['values']['species'] : $_SESSION['tripal_feature_seq_extract']['species'];
  139. $danalysis = isset($form_state['values']['analysis']) ? $form_state['values']['analysis'] : $_SESSION['tripal_feature_seq_extract']['analysis'];
  140. $dftype = isset($form_state['values']['ftype']) ? $form_state['values']['ftype'] : $_SESSION['tripal_feature_seq_extract']['ftype'];
  141. $dfnames = isset($form_state['values']['fnames']) ? $form_state['values']['fnames'] : $_SESSION['tripal_feature_seq_extract']['fnames'];
  142. $dupstream = isset($form_state['values']['upstream']) ? $form_state['values']['upstream'] : $_SESSION['tripal_feature_seq_extract']['upstream'];
  143. $ddownstream = isset($form_state['values']['downstream']) ? $form_state['values']['downstream'] : $_SESSION['tripal_feature_seq_extract']['downstream'];
  144. $dformat = isset($form_state['values']['format']) ? $form_state['values']['format'] : 'fasta_txt';
  145. $duse_parent = isset($form_state['values']['use_parent']) ? $form_state['values']['use_parent'] : $_SESSION['tripal_feature_seq_extract']['use_parent'];
  146. $daggregate = isset($form_state['values']['aggregate']) ? $form_state['values']['aggregate'] : $_SESSION['tripal_feature_seq_extract']['aggregate'];
  147. $dagg_types = isset($form_state['values']['agg_types']) ? $form_state['values']['agg_types'] : $_SESSION['tripal_feature_seq_extract']['agg_types'];
  148. $form = array();
  149. // because we're using Tripal's file_upload_combo form element we
  150. // need to allow the form to upload files
  151. $form['#attributes']['enctype'] = 'multipart/form-data';
  152. $form['#method'] = 'POST';
  153. $form['description'] = array(
  154. '#type' => 'markup',
  155. '#value' => t('Use this form to retrieve sequences in FASTA format.')
  156. );
  157. $sql = "
  158. SELECT DISTINCT genus
  159. FROM {organism}
  160. ORDER BY genus
  161. ";
  162. $results = chado_query($sql);
  163. $genus = array();
  164. $genus[] = '';
  165. while ($organism = $results->fetchObject()) {
  166. $genus[$organism->genus] = $organism->genus;
  167. }
  168. $form['genus'] = array(
  169. '#title' => t('Genus'),
  170. '#type' => 'select',
  171. '#options' => $genus,
  172. '#default_value' => $dgenus,
  173. '#multiple' => FALSE,
  174. '#description' => t('The organism\'s genus. If specified, features for all organism with this genus will be retrieved.'),
  175. '#ahah' => array(
  176. 'path' => 'find/sequences/ajax',
  177. 'wrapper' => 'tripal-feature-seq-extract-form-block',
  178. 'event' => 'change',
  179. 'method' => 'replace',
  180. ),
  181. );
  182. $species = array();
  183. $species[] = '';
  184. if ($dgenus) {
  185. $sql = "
  186. SELECT DISTINCT species
  187. FROM {organism}
  188. WHERE genus = :genus
  189. ORDER BY species
  190. ";
  191. $results = chado_query($sql, array(':genus' => $dgenus));
  192. while ($organism = $results->fetchObject()) {
  193. $species[$organism->species] = $organism->species;
  194. }
  195. }
  196. $form['species'] = array(
  197. '#title' => t('Species'),
  198. '#type' => 'select',
  199. '#options' => $species,
  200. '#default_value' => $dspecies,
  201. '#multiple' => FALSE,
  202. '#description' => t('The organism\'s species name. If specified, features for all organisms with this species will be retrieved. Please first select a genus'),
  203. '#ahah' => array(
  204. 'path' => 'find/sequences/ajax',
  205. 'wrapper' => 'tripal-feature-seq-extract-form-block',
  206. 'event' => 'change',
  207. 'method' => 'replace',
  208. ),
  209. );
  210. $analyses = array();
  211. $analyses[] = '';
  212. if ($dgenus) {
  213. $sql = "
  214. SELECT DISTINCT A.analysis_id, A.name
  215. FROM {analysis_organism} AO
  216. INNER JOIN {analysis} A ON A.analysis_id = AO.analysis_id
  217. INNER JOIN {organism} O ON O.organism_id = AO.organism_id
  218. WHERE O.genus = :genus
  219. ";
  220. $args = array();
  221. $args[':genus'] = $dgenus;
  222. if ($dspecies) {
  223. $sql .= " AND O.species = :species ";
  224. $args[':species'] = $dspecies;
  225. }
  226. $sql .=" ORDER BY A.name ";
  227. $results = chado_query($sql, $args);
  228. while ($analysis = $results->fetchObject()) {
  229. $analyses[$analysis->name] = $analysis->name;
  230. }
  231. }
  232. $form['analysis'] = array(
  233. '#title' => t('Analyses'),
  234. '#type' => 'select',
  235. '#options' => $analyses,
  236. '#default_value' => $danalysis,
  237. '#multiple' => FALSE,
  238. '#description' => t('You can limit sequences by the analyses to which it was derived or was used. If specified, only features associated with the specific analysis will be retrieved.'),
  239. );
  240. $ftype = array();
  241. $ftype[] = '';
  242. if ($dgenus) {
  243. $sql = "
  244. SELECT DISTINCT OFC.cvterm_id, OFC.feature_type
  245. FROM {organism_feature_count} OFC
  246. WHERE OFC.genus = :genus
  247. ";
  248. $args = array();
  249. $args['genus'] = $dgenus;
  250. if ($dspecies) {
  251. $sql .= " AND OFC.species = :species";
  252. $args['species'] = $dspecies;
  253. }
  254. $sql .= " ORDER BY OFC.feature_type ";
  255. $results = chado_query($sql, $args);
  256. while ($type = $results->fetchObject()) {
  257. $ftype[$type->feature_type] = $type->feature_type;
  258. }
  259. }
  260. $form['ftype'] = array(
  261. '#title' => t('Feature Type'),
  262. '#type' => 'select',
  263. '#options' => $ftype,
  264. '#multiple' => FALSE,
  265. '#default_value' => $dftype,
  266. '#description' => t('The type of feature to retrieve (e.g. mRNA). All features that match this type will be retrieved.'),
  267. );
  268. $form['fnames'] = array(
  269. '#title' => t('Feature Name'),
  270. '#type' => 'file_upload_combo',
  271. '#default_value' => $dfnames,
  272. '#description' => t('The names of the features to retrieve. Separate each with a new line or comma. Leave blank to retrieve all features matching other criteria.'),
  273. '#rows' => 8
  274. );
  275. $form['upstream'] = array(
  276. '#title' => t('Upstream Bases'),
  277. '#type' => 'textfield',
  278. '#description' => t('A numeric value specifying the number of upstream bases to include. Only works if the feature is aligned to a larger sequence.'),
  279. '#default_value' => $dupstream,
  280. '#size' => 5,
  281. );
  282. $form['downstream'] = array(
  283. '#title' => t('Downstream Bases'),
  284. '#type' => 'textfield',
  285. '#description' => t('A numeric value specifying the number of downstream bases to incldue. Only works if the feature is aligned to a larger sequence.'),
  286. '#default_value' => $ddownstream,
  287. '#size' => 5,
  288. );
  289. $form['format'] = array(
  290. '#title' => t('Output Format'),
  291. '#type' => 'hidden',
  292. '#default_value' => $dformat,
  293. '#options' => array(
  294. 'fasta_html' => 'FASTA (in browser)',
  295. 'fasta_txt' => 'FASTA (download)',
  296. ),
  297. );
  298. $form['advanced'] = array(
  299. '#type' => 'fieldset',
  300. '#title' => 'Advanced',
  301. '#collapsible' => TRUE,
  302. '#collapsed' => TRUE
  303. );
  304. $form['advanced']['use_parent'] = array(
  305. '#title' => t('Use Parent'),
  306. '#type' => 'checkbox',
  307. '#default_value' => $duse_parent,
  308. '#description' => t('Check this box to retrieve the sequence from the parent in an alignment rather than the feature itself. This is useful if the same feature is aligned to multiple parents and you would like to retrieve the underlying sequence from each parent.'),
  309. );
  310. $form['advanced']['aggregate'] = array(
  311. '#title' => t('Aggregate'),
  312. '#type' => 'checkbox',
  313. '#default_value' => $daggregate,
  314. '#description' => t('Check this box to aggregate sub features into a single sequence. This is useful, for example, for obtaining CDS sequence from an mRNA. Rather than retrieve the mRNA sequence, the sub features of the mRNA will be aggregated and that will be returned.')
  315. );
  316. $form['advanced']['agg_types'] = array(
  317. '#title' => t('Types to aggregate'),
  318. '#type' => 'textarea',
  319. '#default_value' => $dagg_types,
  320. '#description' => t('Set this argument to the type of children to aggregate. This is useful in the case where a gene has exons, CDSs and UTRs. In this case, you may only want to aggregate CDSs and exclude exons. If you want to aggregate both CDSs and UTRs you could specify both.')
  321. );
  322. $form['retrieve_btn'] = array(
  323. '#type' => 'submit',
  324. '#value' => 'Retrieve',
  325. );
  326. $form['reset_btn'] = array(
  327. '#type' => 'submit',
  328. '#value' => 'Reset',
  329. );
  330. return $form;
  331. }
  332. /**
  333. * AJAX callback
  334. *
  335. * @ingroup tripal_feature
  336. */
  337. function tripal_feature_seq_extract_form_ahah_update() {
  338. $status = TRUE;
  339. // prepare and render the form. If no form is returned that means
  340. // we got here by an AHAH call after the form has been submitted. This
  341. // is possible because results are downloaded and the page is not refreshed
  342. // but the form_id goes away. So, we need to rebuild the form in this case.
  343. // otherwise, if the form already exists we just theme it.
  344. $form = tripal_core_ahah_prepare_form();
  345. if ($form) {
  346. $data = theme('tripal_feature_seq_extract_form', $form);
  347. }
  348. else {
  349. $data = drupal_get_form('tripal_feature_seq_extract_form');
  350. }
  351. // bind javascript events to the new objects that will be returned
  352. // so that AHAH enabled elements will work.
  353. $settings = tripal_core_ahah_bind_events();
  354. // return the updated JSON
  355. drupal_json(
  356. array(
  357. 'status' => $status,
  358. 'data' => $data,
  359. 'settings' => $settings,
  360. )
  361. );
  362. }
  363. /**
  364. * Validate the extract sequence form
  365. *
  366. * @ingroup tripal_feature
  367. */
  368. function tripal_feature_seq_extract_form_validate($form, &$form_state) {
  369. $genus = $form_state['values']['genus'];
  370. $species = $form_state['values']['species'];
  371. $analysis = $form_state['values']['analysis'];
  372. $ftype = $form_state['values']['ftype'];
  373. $fnames = $form_state['values']['fnames'];
  374. $upstream = $form_state['values']['upstream'];
  375. $downstream = $form_state['values']['downstream'];
  376. $format = $form_state['values']['format'];
  377. $use_parent = $form_state['values']['use_parent'];
  378. $aggregate = $form_state['values']['aggregate'];
  379. $agg_types = $form_state['values']['agg_types'];
  380. $op = $form_state['values']['op'];
  381. if ($op == 'Retrieve') {
  382. if ($upstream and !preg_match('/^\d+$/', $upstream)) {
  383. form_set_error('upstream', 'Please enter a positive numeric value for the upstream bases');
  384. }
  385. if ($downstream and !preg_match('/^\d+$/', $downstream)) {
  386. form_set_error('downstream', 'Please enter a positive numeric value for the downstream bases');
  387. }
  388. if (!$genus and !$species and !$ftype and !$fnames) {
  389. form_set_error('', 'Please provide a feature name, a feature type or a genus.');
  390. }
  391. }
  392. }
  393. /**
  394. * Submit the extract sequence form
  395. *
  396. * @ingroup tripal_feature
  397. */
  398. function tripal_feature_seq_extract_form_submit($form, &$form_state) {
  399. $genus = $form_state['values']['genus'];
  400. $species = $form_state['values']['species'];
  401. $analysis = $form_state['values']['analysis'];
  402. $ftype = $form_state['values']['ftype'];
  403. $fnames = $form_state['values']['fnames'];
  404. $upstream = $form_state['values']['upstream'];
  405. $downstream = $form_state['values']['downstream'];
  406. $format = $form_state['values']['format'];
  407. $use_parent = $form_state['values']['use_parent'];
  408. $aggregate = $form_state['values']['aggregate'];
  409. $agg_types = $form_state['values']['agg_types'];
  410. $op = $form_state['values']['op'];
  411. // we must use the parent sequence if the user has selected
  412. // the upstream, downstream or to aggregate
  413. if ($upstream or $downstream or $aggregate) {
  414. $use_parent = 1;
  415. }
  416. if ($op == 'Retrieve') {
  417. $_SESSION['tripal_feature_seq_extract']['genus'] = $genus;
  418. $_SESSION['tripal_feature_seq_extract']['species'] = $species;
  419. $_SESSION['tripal_feature_seq_extract']['analysis'] = $analysis;
  420. $_SESSION['tripal_feature_seq_extract']['ftype'] = $ftype;
  421. $_SESSION['tripal_feature_seq_extract']['fnames'] = $fnames;
  422. $_SESSION['tripal_feature_seq_extract']['upstream'] = $upstream;
  423. $_SESSION['tripal_feature_seq_extract']['downstream'] = $downstream;
  424. $_SESSION['tripal_feature_seq_extract']['format'] = $format;
  425. $_SESSION['tripal_feature_seq_extract']['use_parent'] = $use_parent;
  426. $_SESSION['tripal_feature_seq_extract']['aggregate'] = $aggregate;
  427. $_SESSION['tripal_feature_seq_extract']['agg_types'] = $agg_types;
  428. $_SESSION['tripal_feature_seq_extract']['download'] = 1;
  429. }
  430. if ($op == 'Reset') {
  431. unset($_SESSION['tripal_feature_seq_extract']);
  432. }
  433. }
  434. /**
  435. * Actually extract the sequences
  436. *
  437. * @ingroup tripal_feature
  438. */
  439. function tripal_feature_seq_extract_get_features($org_commonname, $genus, $species, $analysis_name,
  440. $type, $feature_name, $upstream, $downstream, $output_format, $derive_from_parent, $aggregate,
  441. $child, $relationship, $rel_part) {
  442. $sub_features = explode(',', $child);
  443. if (!$output_format) {
  444. $output_format = 'fasta_txt';
  445. }
  446. if (!$type and !$feature_name and !$genus) {
  447. print "Please provide a type, feature name or genus\n";
  448. return;
  449. }
  450. // get the list of features
  451. $vars = array();
  452. $sql = "SELECT DISTINCT F.feature_id, F.name, F.uniquename, O.genus, O.species, CVT.name as feature_type " .
  453. "FROM {feature} F " .
  454. " INNER JOIN {organism} O on O.organism_id = F.organism_id " .
  455. " INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id ";
  456. if ($analysis_name) {
  457. $sql .= " INNER JOIN {analysisfeature} AF on AF.feature_id = F.feature_id " .
  458. " INNER JOIN {analysis} A on AF.analysis_id = A.analysis_id ";
  459. }
  460. $sql .= "WHERE (1=1) ";
  461. if ($org_commonname) {
  462. $sql .= "AND O.common_name = :common_name ";
  463. $vars[':common_name'] = $org_commonname;
  464. }
  465. if ($genus) {
  466. $sql .= "AND O.genus = :genus ";
  467. $vars[':genus'] = $genus;
  468. }
  469. if ($species) {
  470. $sql .= "AND O.species = :species ";
  471. $vars[':species'] = $species;
  472. }
  473. if ($type) {
  474. $sql .= "AND CVT.name = ':cvtname ";
  475. $vars[':cvtname'] = $type;
  476. }
  477. if ($feature_name) {
  478. if (is_array($feature_name)) {
  479. $sql .= "AND F.name IN (";
  480. foreach ($feature_name as $i => $fname) {
  481. $sql .= ":fname$i, ";
  482. $vars[":fname$i"] = $fname;
  483. }
  484. // remove the trailing comma and close the paren
  485. $sql = substr($sql, 0, -2) . ")";
  486. }
  487. else {
  488. $sql .= "AND F.name = :fname";
  489. $vars[':fname'] = $feature_name;
  490. }
  491. }
  492. if ($analysis_name) {
  493. $sql .= "AND A.name = :aname";
  494. $vars[':aname'] = $analysis_name;
  495. }
  496. $num_bases_per_line = 50;
  497. $num_seqs = 0;
  498. $q = chado_query($sql, $vars);
  499. while ($feature = $q->fetchObject()) {
  500. $feature_id = $feature->feature_id;
  501. // build the header for each FASTA entry
  502. if ($feature->uniquename == $feature->name) {
  503. $feature_name = "$feature->uniquename $feature->feature_type ($feature->genus $feature->species)";
  504. }
  505. else {
  506. $feature_name = "$feature->uniquename $feature->name $feature->feature_type ($feature->genus $feature->species)";
  507. }
  508. // generate the sequence
  509. $sequence = tripal_feature_get_formatted_sequence($feature_id, $feature_name,
  510. $num_bases_per_line, $derive_from_parent, $aggregate, $output_format,
  511. $upstream, $downstream, $sub_features, $relationship, $rel_part);
  512. // print the sequence
  513. print $sequence;
  514. $num_seqs++;
  515. }
  516. if ($num_seqs == 0) {
  517. print "No Sequences Found";
  518. }
  519. }