tripal_chado.seq_extract.inc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. <?php
  2. /**
  3. * @file
  4. * Interface for downloading feature sequences
  5. */
  6. /**
  7. * The page allowing users to download feature sequences
  8. *
  9. * @ingroup tripal_chado_feature
  10. */
  11. function tripal_chado_feature_seq_extract_download() {
  12. if (!array_key_exists('tripal_feature_seq_extract', $_SESSION)) {
  13. drupal_goto('find/sequences');
  14. }
  15. $genus = $_SESSION['tripal_feature_seq_extract']['genus'];
  16. $species = $_SESSION['tripal_feature_seq_extract']['species'];
  17. $analysis = $_SESSION['tripal_feature_seq_extract']['analysis'];
  18. $ftype = $_SESSION['tripal_feature_seq_extract']['ftype'];
  19. $fnames = $_SESSION['tripal_feature_seq_extract']['fnames'];
  20. $upstream = $_SESSION['tripal_feature_seq_extract']['upstream'];
  21. $downstream = $_SESSION['tripal_feature_seq_extract']['downstream'];
  22. $format = $_SESSION['tripal_feature_seq_extract']['format'];
  23. $use_parent = $_SESSION['tripal_feature_seq_extract']['use_parent'];
  24. $aggregate = $_SESSION['tripal_feature_seq_extract']['aggregate'];
  25. $agg_types = $_SESSION['tripal_feature_seq_extract']['agg_types'];
  26. // Split the sub features and remove any surrounding white space
  27. $agg_types = preg_split("/[\n|,]/", $agg_types);
  28. for ($i = 0; $i < count($agg_types); $i++) {
  29. $agg_types[$i] = trim($agg_types[$i]);
  30. }
  31. header('Content-Type: text; utf-8');
  32. if ($ftype == 'polypeptide') {
  33. header('Content-Disposition: attachment; filename="sequences.fna"');
  34. }
  35. else {
  36. header('Content-Disposition: attachment; filename="sequences.fnn"');
  37. }
  38. $seqs = chado_get_bulk_feature_sequences(array(
  39. 'genus' => $genus,
  40. 'species' => $species,
  41. 'analysis_name' => $analysis,
  42. 'type' => $ftype,
  43. 'feature_name' => $fnames['items_array'],
  44. 'upstream' => $upstream,
  45. 'downstream' => $downstream,
  46. 'output_format' => $format,
  47. 'derive_from_parent' => $use_parent,
  48. 'aggregate' => $aggregate,
  49. 'sub_feature_types' => $agg_types,
  50. 'width' => 60
  51. ));
  52. if (count($seqs) == 0) {
  53. print ">No sequences found that match the criteria.";
  54. }
  55. foreach ($seqs as $seq) {
  56. print ">" . $seq['defline'] . "\r\n";
  57. print $seq['residues'] . "\r\n";
  58. }
  59. }
  60. /**
  61. * Form to choose which features to extract sequence for
  62. *
  63. * @ingroup tripal_chado_feature
  64. */
  65. function tripal_chado_feature_seq_extract_form($form, &$form_state) {
  66. $form['#true'] = TRUE;
  67. // Intialize the defaults
  68. $dgenus = '';
  69. $dspecies = '';
  70. $danalysis = '';
  71. $dftype = '';
  72. $dfnames = '';
  73. $dupstream = '';
  74. $ddownstream = '';
  75. $duse_parent = '';
  76. $daggregate = '';
  77. $dagg_types = '';
  78. if (array_key_exists('tripal_feature_seq_extract', $_SESSION)) {
  79. $dgenus = $_SESSION['tripal_feature_seq_extract']['genus'];
  80. $dspecies = $_SESSION['tripal_feature_seq_extract']['species'];
  81. $danalysis = $_SESSION['tripal_feature_seq_extract']['analysis'];
  82. $dftype = $_SESSION['tripal_feature_seq_extract']['ftype'];
  83. $dfnames = $_SESSION['tripal_feature_seq_extract']['fnames'];
  84. $dupstream = $_SESSION['tripal_feature_seq_extract']['upstream'];
  85. $ddownstream = $_SESSION['tripal_feature_seq_extract']['downstream'];
  86. $duse_parent = $_SESSION['tripal_feature_seq_extract']['use_parent'];
  87. $daggregate = $_SESSION['tripal_feature_seq_extract']['aggregate'];
  88. $dagg_types = $_SESSION['tripal_feature_seq_extract']['agg_types'];
  89. }
  90. // we want to allow the query string to provide values for the form
  91. if (array_key_exists('fnames', $_GET) and $_GET['fnames']) {
  92. $dfnames = $_GET['fnames'];
  93. }
  94. if (array_key_exists('genus', $_GET) and $_GET['genus']) {
  95. $dgenus = $_GET['genus'];
  96. }
  97. if (array_key_exists('species', $_GET) and $_GET['species']) {
  98. $dspecies = $_GET['species'];
  99. }
  100. if (array_key_exists('ftype', $_GET) and $_GET['ftype']) {
  101. $dftype = $_GET['ftype'];
  102. }
  103. if (array_key_exists('analysis', $_GET) and $_GET['analysis']) {
  104. $danalysis = $_GET['analysis'];
  105. }
  106. if (array_key_exists('upstream', $_GET) and $_GET['upstream']) {
  107. $dupstream = $_GET['upstream'];
  108. }
  109. if (array_key_exists('downstream', $_GET) and $_GET['downstream']) {
  110. $ddownstream = $_GET['downstream'];
  111. }
  112. if (array_key_exists('use_parent', $_GET) and $_GET['use_parent']) {
  113. $duse_parent = $_GET['use_parent'];
  114. }
  115. if (array_key_exists('aggregate', $_GET) and $_GET['aggregate']) {
  116. $daggregate = $_GET['aggregate'];
  117. }
  118. if (array_key_exists('agg_types', $_GET) and $_GET['agg_types']) {
  119. $dagg_types = $_GET['agg_types'];
  120. }
  121. // get defaults from the form state
  122. if (array_key_exists('values', $form_state)) {
  123. $dgenus = $form_state['values']['genus'];
  124. $dspecies = $form_state['values']['species'];
  125. $danalysis = $form_state['values']['analysis'];
  126. $dftype = $form_state['values']['ftype'];
  127. $dfnames = $form_state['values']['fnames'];
  128. $dupstream = $form_state['values']['upstream'];
  129. $ddownstream = $form_state['values']['downstream'];
  130. $dformat = $form_state['values']['format'];
  131. $duse_parent = $form_state['values']['use_parent'];
  132. $daggregate = $form_state['values']['aggregate'];
  133. $dagg_types = $form_state['values']['agg_types'];
  134. }
  135. // Because we're using Tripal's file_upload_combo form element we
  136. // need to allow the form to upload files
  137. $form['#attributes']['enctype'] = 'multipart/form-data';
  138. $form['#method'] = 'POST';
  139. $form['description'] = array(
  140. '#markup' => t('Use this form to retrieve sequences in FASTA format.')
  141. );
  142. $sql = "
  143. SELECT DISTINCT genus
  144. FROM {organism}
  145. ORDER BY genus
  146. ";
  147. $results = chado_query($sql);
  148. $genus = array();
  149. $genus[] = '';
  150. while ($organism = $results->fetchObject()) {
  151. $genus[$organism->genus] = $organism->genus;
  152. }
  153. $form['genus'] = array(
  154. '#title' => t('Genus'),
  155. '#type' => 'select',
  156. '#options' => $genus,
  157. '#default_value' => $dgenus,
  158. '#multiple' => FALSE,
  159. '#description' => t('The organism\'s genus. If specified, features for all organism with this genus will be retrieved.'),
  160. '#ajax' => array(
  161. 'callback' => 'tripal_chado_feature_seq_extract_form_ajax_callback',
  162. 'wrapper' => 'tripal-feature-seq-extract-form',
  163. 'event' => 'change',
  164. 'method' => 'replace',
  165. ),
  166. );
  167. $species = array();
  168. $species[] = '';
  169. if ($dgenus) {
  170. $sql = "
  171. SELECT DISTINCT species
  172. FROM {organism}
  173. WHERE genus = :genus
  174. ORDER BY species
  175. ";
  176. $results = chado_query($sql, array(':genus' => $dgenus));
  177. while ($organism = $results->fetchObject()) {
  178. $species[$organism->species] = $organism->species;
  179. }
  180. }
  181. $form['species'] = array(
  182. '#title' => t('Species'),
  183. '#type' => 'select',
  184. '#options' => $species,
  185. '#default_value' => $dspecies,
  186. '#multiple' => FALSE,
  187. '#description' => t('The organism\'s species name. If specified, features for all organisms with this species will be retrieved. Please first select a genus'),
  188. '#ajax' => array(
  189. 'callback' => 'tripal_chado_feature_seq_extract_form_ajax_callback',
  190. 'wrapper' => 'tripal-feature-seq-extract-form',
  191. 'event' => 'change',
  192. 'method' => 'replace',
  193. ),
  194. );
  195. $analyses = array();
  196. $analyses[] = '';
  197. if ($dgenus) {
  198. $sql = "
  199. SELECT DISTINCT A.analysis_id, A.name
  200. FROM {analysis_organism} AO
  201. INNER JOIN {analysis} A ON A.analysis_id = AO.analysis_id
  202. INNER JOIN {organism} O ON O.organism_id = AO.organism_id
  203. WHERE O.genus = :genus
  204. ";
  205. $args = array();
  206. $args[':genus'] = $dgenus;
  207. if ($dspecies) {
  208. $sql .= " AND O.species = :species ";
  209. $args[':species'] = $dspecies;
  210. }
  211. $sql .=" ORDER BY A.name ";
  212. $results = chado_query($sql, $args);
  213. while ($analysis = $results->fetchObject()) {
  214. $analyses[$analysis->name] = $analysis->name;
  215. }
  216. }
  217. $form['analysis'] = array(
  218. '#title' => t('Analyses'),
  219. '#type' => 'select',
  220. '#options' => $analyses,
  221. '#default_value' => $danalysis,
  222. '#multiple' => FALSE,
  223. '#description' => t('You can limit sequences by the analyses to which it was derived or was used. If specified, only features associated with the specific analysis will be retrieved.'),
  224. );
  225. $ftype = array();
  226. $ftype[] = '';
  227. if ($dgenus) {
  228. $sql = "
  229. SELECT DISTINCT OFC.cvterm_id, OFC.feature_type
  230. FROM {organism_feature_count} OFC
  231. WHERE OFC.genus = :genus
  232. ";
  233. $args = array();
  234. $args['genus'] = $dgenus;
  235. if ($dspecies) {
  236. $sql .= " AND OFC.species = :species";
  237. $args['species'] = $dspecies;
  238. }
  239. $sql .= " ORDER BY OFC.feature_type ";
  240. $results = chado_query($sql, $args);
  241. while ($type = $results->fetchObject()) {
  242. $ftype[$type->feature_type] = $type->feature_type;
  243. }
  244. }
  245. $form['ftype'] = array(
  246. '#title' => t('Feature Type'),
  247. '#type' => 'select',
  248. '#options' => $ftype,
  249. '#multiple' => FALSE,
  250. '#default_value' => $dftype,
  251. '#description' => t('The type of feature to retrieve (e.g. mRNA). All
  252. features that match this type will be retrieved.'),
  253. );
  254. $form['fnames'] = array(
  255. '#title' => t('Feature Name'),
  256. '#type' => 'file_upload_combo',
  257. '#default_value' => $dfnames,
  258. '#description' => t('The names of the features to retrieve. Separate each
  259. with a new line or comma. Leave blank to retrieve all features
  260. matching other criteria.'),
  261. '#rows' => 8
  262. );
  263. $form['upstream'] = array(
  264. '#title' => t('Upstream Bases'),
  265. '#type' => 'textfield',
  266. '#description' => t('A numeric value specifying the number of upstream
  267. bases to include. Only works if the feature is aligned to a larger
  268. sequence.'),
  269. '#default_value' => $dupstream,
  270. '#size' => 5,
  271. );
  272. $form['downstream'] = array(
  273. '#title' => t('Downstream Bases'),
  274. '#type' => 'textfield',
  275. '#description' => t('A numeric value specifying the number of downstream
  276. bases to incldue. Only works if the feature is aligned to a larger
  277. sequence.'),
  278. '#default_value' => $ddownstream,
  279. '#size' => 5,
  280. );
  281. $form['advanced'] = array(
  282. '#type' => 'fieldset',
  283. '#title' => 'Advanced',
  284. '#collapsible' => TRUE,
  285. '#collapsed' => TRUE
  286. );
  287. $form['advanced']['use_parent'] = array(
  288. '#title' => t('Use Parent'),
  289. '#type' => 'checkbox',
  290. '#default_value' => $duse_parent,
  291. '#description' => t('Check this box to retrieve the sequence from the
  292. parent in an alignment rather than the feature itself. This is useful
  293. if the same feature is aligned to multiple parents and you would like
  294. to retrieve the underlying sequence from each parent.'),
  295. );
  296. $form['advanced']['aggregate'] = array(
  297. '#title' => t('Aggregate'),
  298. '#type' => 'checkbox',
  299. '#default_value' => $daggregate,
  300. '#description' => t('Check this box to aggregate sub features into a
  301. single sequence. This is useful, for example, for obtaining CDS
  302. sequence from an mRNA. Rather than retrieve the mRNA sequence, the
  303. sub features of the mRNA will be aggregated and that will be returned.')
  304. );
  305. $form['advanced']['agg_types'] = array(
  306. '#title' => t('Types to aggregate'),
  307. '#type' => 'textarea',
  308. '#default_value' => $dagg_types,
  309. '#description' => t('Set this argument to the type of children to
  310. aggregate. This is useful in the case where a gene has exons, CDSs
  311. and UTRs. In this case, you may only want to aggregate CDSs and
  312. exclude exons. If you want to aggregate both CDSs and UTRs you
  313. could specify both. Please place each type on a new line.')
  314. );
  315. $form['retrieve_btn'] = array(
  316. '#type' => 'submit',
  317. '#name' => 'retrieve',
  318. '#value' => 'Retrieve Sequences',
  319. );
  320. if (user_access('administer tripal')) {
  321. $notice = tripal_set_message("Administrators, the " .
  322. l('organism_feature_count', 'admin/tripal/schema/mviews') . " and " .
  323. l('analysis_organism', 'admin/tripal/schema/mviews') . " materialized
  324. views must be populated before using this form. Those views should be re-populated
  325. when new data is added.", TRIPAL_NOTICE, array('return_html' => TRUE));
  326. }
  327. $form['#prefix'] = '<div id="tripal-feature-seq-extract-form">';
  328. $form['#suffix'] = $notice . '</div>';
  329. return $form;
  330. }
  331. /**
  332. * Theme the Form to choose which features to extract sequence for
  333. *
  334. * @ingroup tripal_chado_feature
  335. */
  336. function theme_tripal_chado_feature_seq_extract_form(&$variables) {
  337. $form = $variables['form'];
  338. $headers = array();
  339. $rows = array(
  340. 0 => array(
  341. array('data' => drupal_render($form['description']), 'colspan' => 3),
  342. ),
  343. 1 => array(
  344. drupal_render($form['genus']),
  345. drupal_render($form['species']) ,
  346. drupal_render($form['ftype']),
  347. ),
  348. 2 => array(
  349. array('data' => drupal_render($form['analysis']), 'colspan' => 3),
  350. //drupal_render($form['format']),
  351. ),
  352. 3 => array(
  353. array('data' => drupal_render($form['fnames']), 'colspan' => 2),
  354. drupal_render($form['upstream']) .
  355. drupal_render($form['downstream']) .
  356. drupal_render($form['format']),
  357. ),
  358. 4 => array(
  359. array(
  360. 'data' => drupal_render($form['advanced']),
  361. 'colspan' => 3,
  362. ),
  363. ),
  364. 5 => array(
  365. array(
  366. 'data' => drupal_render($form['retrieve_btn']) . drupal_render($form['reset_btn']),
  367. 'colspan' => 3,
  368. ),
  369. ),
  370. );
  371. $table_vars = array(
  372. 'header' => $headers,
  373. 'rows' => $rows,
  374. 'attributes' => array(
  375. 'id' => 'tripal-feature-seq-extract-form-table',
  376. 'border' => '0'
  377. ),
  378. 'sticky' => FALSE,
  379. 'colgroups' => array(),
  380. 'empty' => '',
  381. );
  382. $form['rendered_form'] = array(
  383. '#type' => 'item',
  384. '#markup' => theme('table', $table_vars),
  385. );
  386. return drupal_render_children($form);
  387. }
  388. /**
  389. * Ajax function which returns the form via ajax
  390. */
  391. function tripal_chado_feature_seq_extract_form_ajax_callback($form, &$form_state) {
  392. return $form;
  393. }
  394. /**
  395. * Validate the extract sequence form
  396. *
  397. * @ingroup tripal_chado_feature
  398. */
  399. function tripal_chado_feature_seq_extract_form_validate($form, &$form_state) {
  400. $genus = $form_state['values']['genus'];
  401. $species = $form_state['values']['species'];
  402. $analysis = $form_state['values']['analysis'];
  403. $ftype = $form_state['values']['ftype'];
  404. $fnames = $form_state['values']['fnames'];
  405. $upstream = $form_state['values']['upstream'];
  406. $downstream = $form_state['values']['downstream'];
  407. $use_parent = $form_state['values']['use_parent'];
  408. $aggregate = $form_state['values']['aggregate'];
  409. $agg_types = $form_state['values']['agg_types'];
  410. if ($upstream and !preg_match('/^\d+$/', $upstream)) {
  411. form_set_error('upstream', 'Please enter a positive numeric value for the upstream bases');
  412. }
  413. if ($downstream and !preg_match('/^\d+$/', $downstream)) {
  414. form_set_error('downstream', 'Please enter a positive numeric value for the downstream bases');
  415. }
  416. if (!$genus and !$species and !$ftype and !$fnames) {
  417. form_set_error('', 'Please provide a feature name, a feature type or a genus.');
  418. }
  419. if ($ftype == 'polypeptide' and $upstream) {
  420. form_set_error('upstream', 'When the sequence type is protein the upstream value must be unset.');
  421. }
  422. if ($ftype == 'polypeptide' and $downstream) {
  423. form_set_error('downstream', 'When the sequence type is protein the downstream value must be unset.');
  424. }
  425. if ($ftype == 'polypeptide' and $use_parent) {
  426. form_set_error('use_parent', 'When the sequence type is protein the "Use Parent" option must not be set.');
  427. }
  428. }
  429. /**
  430. * Submit the extract sequence form
  431. *
  432. * @ingroup tripal_chado_feature
  433. */
  434. function tripal_chado_feature_seq_extract_form_submit($form, &$form_state) {
  435. $genus = $form_state['values']['genus'];
  436. $species = $form_state['values']['species'];
  437. $analysis = $form_state['values']['analysis'];
  438. $ftype = $form_state['values']['ftype'];
  439. $fnames = $form_state['values']['fnames'];
  440. $upstream = $form_state['values']['upstream'];
  441. $downstream = $form_state['values']['downstream'];
  442. $use_parent = $form_state['values']['use_parent'];
  443. $aggregate = $form_state['values']['aggregate'];
  444. $agg_types = $form_state['values']['agg_types'];
  445. // we must use the parent sequence if the user has selected
  446. // the upstream, downstream or to aggregate
  447. if ($upstream or $downstream or $aggregate) {
  448. $use_parent = 1;
  449. }
  450. if ($form_state['clicked_button']['#name'] == 'retrieve') {
  451. $_SESSION['tripal_feature_seq_extract']['genus'] = $genus;
  452. $_SESSION['tripal_feature_seq_extract']['species'] = $species;
  453. $_SESSION['tripal_feature_seq_extract']['analysis'] = $analysis;
  454. $_SESSION['tripal_feature_seq_extract']['ftype'] = $ftype;
  455. $_SESSION['tripal_feature_seq_extract']['fnames'] = $fnames;
  456. $_SESSION['tripal_feature_seq_extract']['upstream'] = $upstream;
  457. $_SESSION['tripal_feature_seq_extract']['downstream'] = $downstream;
  458. $_SESSION['tripal_feature_seq_extract']['format'] = 'fasta_txt';
  459. $_SESSION['tripal_feature_seq_extract']['use_parent'] = $use_parent;
  460. $_SESSION['tripal_feature_seq_extract']['aggregate'] = $aggregate;
  461. $_SESSION['tripal_feature_seq_extract']['agg_types'] = $agg_types;
  462. $_SESSION['tripal_feature_seq_extract']['download'] = 1;
  463. drupal_goto('find/sequences/download');
  464. }
  465. }