seq_extract.inc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. <?php
  2. /*
  3. *
  4. */
  5. function tripal_feature_seq_extract_page() {
  6. if ($_SESSION['tripal_feature_seq_extract']['download']){
  7. $genus = $_SESSION['tripal_feature_seq_extract']['genus'];
  8. $species = $_SESSION['tripal_feature_seq_extract']['species'];
  9. $analysis = $_SESSION['tripal_feature_seq_extract']['analysis'];
  10. $ftype = $_SESSION['tripal_feature_seq_extract']['ftype'];
  11. $fname = $_SESSION['tripal_feature_seq_extract']['fname'];
  12. $upstream = $_SESSION['tripal_feature_seq_extract']['upstream'];
  13. $downstream = $_SESSION['tripal_feature_seq_extract']['downstream'];
  14. $format = $_SESSION['tripal_feature_seq_extract']['format'];
  15. $use_parent = $_SESSION['tripal_feature_seq_extract']['use_parent'];
  16. $aggregate = $_SESSION['tripal_feature_seq_extract']['aggregate'];
  17. $agg_types = $_SESSION['tripal_feature_seq_extract']['agg_types'];
  18. unset($_SESSION['tripal_feature_seq_extract']['download']);
  19. if ($format == 'fasta_html') {
  20. drupal_set_header('Content-Type: text/html');
  21. }
  22. else {
  23. drupal_set_header('Content-Type: text');
  24. drupal_set_header('Content-Disposition: attachment; filename="sequences.fna"');
  25. }
  26. tripal_feature_seq_extract_get_features(NULL, $genus, $species, $analysis,
  27. $ftype, $fname, $upstream, $downstream, $format, $use_parent, $aggregate, $agg_types);
  28. return;
  29. }
  30. // generate the search form
  31. $output .= '';
  32. if (user_access('access administration pages')) {
  33. $output .= "
  34. <div class=\"tripal-no-results\">
  35. <p>Administrators, the " . l('organism_feature_count', 'admin/tripal/mviews') . " and
  36. " . l('analysis_organism', 'admin/tripal/mviews') . " materialized
  37. views must be populated before using this form. Those views should be re-populated
  38. when new data is added.</p>
  39. </div>
  40. ";
  41. }
  42. $output .= drupal_get_form('tripal_feature_seq_extract_form');
  43. return $output;
  44. }
  45. /*
  46. *
  47. */
  48. function theme_tripal_feature_seq_extract_form($form) {
  49. $rows = array(
  50. 0 => array(
  51. drupal_render($form['genus']),
  52. drupal_render($form['species']) ,
  53. drupal_render($form['ftype']),
  54. ),
  55. 1 => array(
  56. array('data' => drupal_render($form['analysis']), 'colspan' => 3),
  57. //drupal_render($form['format']),
  58. ),
  59. 2 => array(
  60. array('data' => drupal_render($form['fname']), 'colspan' => 2),
  61. drupal_render($form['upstream']) . drupal_render($form['downstream']) ,
  62. ),
  63. 3 => array(
  64. array(
  65. 'data' => drupal_render($form['advanced']),
  66. 'colspan' => 3,
  67. ),
  68. ),
  69. 4 => array(
  70. array(
  71. 'data' => drupal_render($form['retrieve_btn']),
  72. 'colspan' => 3,
  73. ),
  74. ),
  75. );
  76. $headers = array();
  77. $table = theme('table', $headers, $rows, array('id' => 'tripal-feature-seq-extract-form-table', 'border' => '0'));
  78. $markup = '';
  79. $markup .= $table;
  80. $form['criteria'] = array(
  81. '#type' => 'markup',
  82. '#value' => $markup,
  83. '#weight' => -10,
  84. );
  85. return drupal_render($form);
  86. }
  87. /**
  88. *
  89. */
  90. function tripal_feature_seq_extract_form(&$form_state = NULL) {
  91. tripal_core_ahah_init_form();
  92. // get defaults
  93. $dgenus = isset($form_state['values']['genus']) ? $form_state['values']['genus'] : $_SESSION['tripal_feature_seq_extract']['genus'];
  94. $dspecies = isset($form_state['values']['species']) ? $form_state['values']['species'] : $_SESSION['tripal_feature_seq_extract']['species'];
  95. $danalysis = isset($form_state['values']['analysis']) ? $form_state['values']['analysis'] : $_SESSION['tripal_feature_seq_extract']['analysis'];
  96. $dftype = isset($form_state['values']['ftype']) ? $form_state['values']['ftype'] : $_SESSION['tripal_feature_seq_extract']['ftype'];
  97. $dfname = isset($form_state['values']['fname']) ? $form_state['values']['fname'] : $_SESSION['tripal_feature_seq_extract']['fname'];
  98. $dupstream = isset($form_state['values']['upstream']) ? $form_state['values']['upstream'] : $_SESSION['tripal_feature_seq_extract']['upstream'];
  99. $ddownstream = isset($form_state['values']['downstream']) ? $form_state['values']['downstream'] : $_SESSION['tripal_feature_seq_extract']['downstream'];
  100. $dformat = isset($form_state['values']['format']) ? $form_state['values']['format'] : 'fasta_txt';
  101. $duse_parent = isset($form_state['values']['use_parent']) ? $form_state['values']['use_parent'] : $_SESSION['tripal_feature_seq_extract']['use_parent'];
  102. $daggregate = isset($form_state['values']['aggregate']) ? $form_state['values']['aggregate'] : $_SESSION['tripal_feature_seq_extract']['aggregate'];
  103. $dagg_types = isset($form_state['values']['agg_types']) ? $form_state['values']['agg_types'] : $_SESSION['tripal_feature_seq_extract']['agg_types'];
  104. $sql = "
  105. SELECT DISTINCT genus
  106. FROM {organism}
  107. ORDER BY genus
  108. ";
  109. $results = chado_query($sql);
  110. $genus = array();
  111. $genus[] = '';
  112. while ($organism = db_fetch_object($results)) {
  113. $genus[$organism->genus] = $organism->genus;
  114. }
  115. $form['genus'] = array(
  116. '#title' => t('Genus'),
  117. '#type' => 'select',
  118. '#options' => $genus,
  119. '#default_value' => $dgenus,
  120. '#multiple' => FALSE,
  121. '#description' => t('The organism\'s genus. If specified, features for all organism with this genus will be retrieved.'),
  122. '#ahah' => array(
  123. 'path' => 'seq_extract/set_genus',
  124. 'wrapper' => 'tripal-feature-seq-extract-form-table',
  125. 'event' => 'change',
  126. 'method' => 'replace',
  127. ),
  128. );
  129. $species = array();
  130. $species[] = '';
  131. if ($dgenus) {
  132. $sql = "
  133. SELECT DISTINCT species
  134. FROM {organism}
  135. WHERE genus = '%s'
  136. ORDER BY species
  137. ";
  138. $results = chado_query($sql, $dgenus);
  139. while ($organism = db_fetch_object($results)) {
  140. $species[$organism->species] = $organism->species;
  141. }
  142. }
  143. $form['species'] = array(
  144. '#title' => t('Species'),
  145. '#type' => 'select',
  146. '#options' => $species,
  147. '#default_value' => $dspecies,
  148. '#multiple' => FALSE,
  149. '#description' => t('The organism\'s species name. If specified, features for all organisms with this species will be retrieved. Please first select a genus'),
  150. '#ahah' => array(
  151. 'path' => 'seq_extract/set_species',
  152. 'wrapper' => 'tripal-feature-seq-extract-form-table',
  153. 'event' => 'change',
  154. 'method' => 'replace',
  155. ),
  156. );
  157. $analyses = array();
  158. $analyses[] = '';
  159. if ($dgenus) {
  160. $sql = "
  161. SELECT DISTINCT A.analysis_id, A.name
  162. FROM {analysis_organism} AO
  163. INNER JOIN {analysis} A ON A.analysis_id = AO.analysis_id
  164. INNER JOIN {organism} O ON O.organism_id = AO.organism_id
  165. WHERE O.genus = '%s'
  166. ";
  167. $args = array();
  168. $args[] = $dgenus;
  169. if ($dspecies) {
  170. $sql .= " AND O.species = '%s' ";
  171. $args[] = $dspecies;
  172. }
  173. $sql .=" ORDER BY A.name ";
  174. $results = chado_query($sql, $args);
  175. while ($analysis = db_fetch_object($results)) {
  176. $analyses[$analysis->name] = $analysis->name;
  177. }
  178. }
  179. $form['analysis'] = array(
  180. '#title' => t('Source'),
  181. '#type' => 'select',
  182. '#options' => $analyses,
  183. '#default_value' => $danalysis,
  184. '#multiple' => FALSE,
  185. '#description' => t('The feature source. If specified, only features derived or part of the selected source will be retrieved.'),
  186. '#ahah' => array(
  187. 'path' => 'seq_extract/set_source',
  188. 'wrapper' => 'tripal-feature-seq-extract-form-table',
  189. 'event' => 'change',
  190. 'method' => 'replace',
  191. ),
  192. );
  193. $ftype = array();
  194. $ftype[] = '';
  195. if ($dgenus) {
  196. $sql = "
  197. SELECT DISTINCT OFC.cvterm_id, OFC.feature_type
  198. FROM {organism_feature_count} OFC
  199. WHERE OFC.genus = '%s'
  200. ";
  201. $args = array();
  202. $args[] = $dgenus;
  203. if ($dspecies) {
  204. $sql .= " AND OFC.species = '%s'";
  205. $args[] = $dspecies;
  206. }
  207. $sql .= " ORDER BY OFC.feature_type ";
  208. $results = chado_query($sql, $args);
  209. while ($type = db_fetch_object($results)) {
  210. $ftype[$type->feature_type] = $type->feature_type;
  211. }
  212. }
  213. $form['ftype'] = array(
  214. '#title' => t('Feature Type'),
  215. '#type' => 'select',
  216. '#options' => $ftype,
  217. '#multiple' => FALSE,
  218. '#default_value' => $dftype,
  219. '#description' => t('The type of feature to retrieve (e.g. mRNA). All features that match this type will be retrieved.'),
  220. );
  221. $form['fname'] = array(
  222. '#title' => t('Feature Name'),
  223. '#type' => 'textarea',
  224. '#default_value' => $dfname,
  225. '#description' => t('The names of the features to retrieve. Separate each with a space. Leave blank to retrieve all features matching other criteria.'),
  226. '#rows' => 8
  227. );
  228. $form['upstream'] = array(
  229. '#title' => t('Upstream Bases'),
  230. '#type' => 'textfield',
  231. '#description' => t('An numeric value specifying the number of upstream bases to include.'),
  232. '#default_value' => $dupstream,
  233. '#size' => 5,
  234. );
  235. $form['downstream'] = array(
  236. '#title' => t('Downstream Bases'),
  237. '#type' => 'textfield',
  238. '#description' => t('An numeric value specifying the number of downstream bases to incldue.'),
  239. '#default_value' => $ddownstream,
  240. '#size' => 5,
  241. );
  242. $form['format'] = array(
  243. '#title' => t('Output Format'),
  244. '#type' => 'hidden',
  245. '#default_value' => $dformat,
  246. '#options' => array(
  247. 'fasta_html' => 'FASTA (in browser)',
  248. 'fasta_txt' => 'FASTA (download)',
  249. ),
  250. );
  251. $form['advanced'] = array(
  252. '#type' => 'fieldset',
  253. '#title' => 'Advanced',
  254. '#collapsible' => TRUE,
  255. '#collapsed' => TRUE
  256. );
  257. $form['advanced']['use_parent'] = array(
  258. '#title' => t('Use Parent'),
  259. '#type' => 'checkbox',
  260. '#default_value' => $duse_parent,
  261. '#description' => t('Check this box to retrieve the sequence from the parent in an alignment rather than the feature itself. This is useful if the same feature is aligned to multiple parents and you would like to retrieve the underlying sequence from each parent.'),
  262. );
  263. $form['advanced']['aggregate'] = array(
  264. '#title' => t('Aggregate'),
  265. '#type' => 'checkbox',
  266. '#default_value' => $daggregate,
  267. '#description' => t('Check this box to aggregate sub features into a single sequence. This is useful, for example, for obtaining CDS sequence from an mRNA. Rather than retrieve the mRNA sequence, the sub features of the mRNA will be aggregated and that will be returned.')
  268. );
  269. $form['advanced']['agg_types'] = array(
  270. '#title' => t('Types to aggregate'),
  271. '#type' => 'textarea',
  272. '#default_value' => $dagg_types,
  273. '#description' => t('Set this argument to the type of children to aggregate. This is useful in the case where a gene has exons, CDSs and UTRs. In this case, you may only want to aggregate CDSs and exclude exons. If you want to aggregate both CDSs and UTRs you could specify both.')
  274. );
  275. $form['retrieve_btn'] = array(
  276. '#type' => 'submit',
  277. '#value' => 'Retrieve'
  278. );
  279. return $form;
  280. }
  281. /**
  282. *
  283. */
  284. function tripal_feature_seq_extract_set_genus() {
  285. $status = TRUE;
  286. // prepare and render the form
  287. $form = tripal_core_ahah_prepare_form();
  288. $data = theme('tripal_feature_seq_extract_form', $form);
  289. // bind javascript events to the new objects that will be returned
  290. // so that AHAH enabled elements will work.
  291. $settings = tripal_core_ahah_bind_events();
  292. // return the updated JSON
  293. drupal_json(
  294. array(
  295. 'status' => $status,
  296. 'data' => $data,
  297. 'settings' => $settings,
  298. )
  299. );
  300. }
  301. /**
  302. *
  303. */
  304. function tripal_feature_seq_extract_set_species() {
  305. $status = TRUE;
  306. // prepare and render the form
  307. $form = tripal_core_ahah_prepare_form();
  308. $data = theme('tripal_feature_seq_extract_form', $form);
  309. // bind javascript events to the new objects that will be returned
  310. // so that AHAH enabled elements will work.
  311. $settings = tripal_core_ahah_bind_events();
  312. // return the updated JSON
  313. drupal_json(
  314. array(
  315. 'status' => $status,
  316. 'data' => $data,
  317. 'settings' => $settings,
  318. )
  319. );
  320. }
  321. /**
  322. *
  323. */
  324. function tripal_feature_seq_extract_set_source() {
  325. $status = TRUE;
  326. // prepare and render the form
  327. $form = tripal_core_ahah_prepare_form();
  328. $data = theme('tripal_feature_seq_extract_form', $form);
  329. // bind javascript events to the new objects that will be returned
  330. // so that AHAH enabled elements will work.
  331. $settings = tripal_core_ahah_bind_events();
  332. // return the updated JSON
  333. drupal_json(
  334. array(
  335. 'status' => $status,
  336. 'data' => $data,
  337. 'settings' => $settings,
  338. )
  339. );
  340. }
  341. /**
  342. *
  343. */
  344. function tripal_feature_seq_extract_form_validate($form, &$form_state) {
  345. $genus = $form_state['values']['genus'];
  346. $species = $form_state['values']['species'];
  347. $analysis = $form_state['values']['analysis'];
  348. $ftype = $form_state['values']['ftype'];
  349. $fname = $form_state['values']['fname'];
  350. $upstream = $form_state['values']['upstream'];
  351. $downstream = $form_state['values']['downstream'];
  352. $format = $form_state['values']['format'];
  353. $use_parent = $form_state['values']['use_parent'];
  354. $aggregate = $form_state['values']['aggregate'];
  355. $agg_types = $form_state['values']['agg_types'];
  356. if ($upstream and !preg_match('/^\d+$/', $upstream)) {
  357. form_set_error('upstream', 'Please enter a positive numeric value for the upstream bases');
  358. }
  359. if ($downstream and !preg_match('/^\d+$/', $downstream)) {
  360. form_set_error('downstream', 'Please enter a positive numeric value for the downstream bases');
  361. }
  362. if (!$genus and !$species and !$ftype and !$fname) {
  363. form_set_error('', 'Please provide a feature name, a feature type or a genus.');
  364. }
  365. }
  366. /**
  367. *
  368. */
  369. function tripal_feature_seq_extract_form_submit($form, &$form_state) {
  370. $genus = $form_state['values']['genus'];
  371. $species = $form_state['values']['species'];
  372. $analysis = $form_state['values']['analysis'];
  373. $ftype = $form_state['values']['ftype'];
  374. $fname = $form_state['values']['fname'];
  375. $upstream = $form_state['values']['upstream'];
  376. $downstream = $form_state['values']['downstream'];
  377. $format = $form_state['values']['format'];
  378. $use_parent = $form_state['values']['use_parent'];
  379. $aggregate = $form_state['values']['aggregate'];
  380. $agg_types = $form_state['values']['agg_types'];
  381. // we must use the parent sequence if the user has selected
  382. // the upstream, downstream or to aggregate
  383. if ($upstream or $downstream or $aggregate) {
  384. $use_parent = 1;
  385. }
  386. $_SESSION['tripal_feature_seq_extract']['genus'] = $genus;
  387. $_SESSION['tripal_feature_seq_extract']['species'] = $species;
  388. $_SESSION['tripal_feature_seq_extract']['analysis'] = $analysis;
  389. $_SESSION['tripal_feature_seq_extract']['ftype'] = $ftype;
  390. $_SESSION['tripal_feature_seq_extract']['fname'] = $fname;
  391. $_SESSION['tripal_feature_seq_extract']['upstream'] = $upstream;
  392. $_SESSION['tripal_feature_seq_extract']['downstream'] = $downstream;
  393. $_SESSION['tripal_feature_seq_extract']['format'] = $format;
  394. $_SESSION['tripal_feature_seq_extract']['use_parent'] = $use_parent;
  395. $_SESSION['tripal_feature_seq_extract']['aggregate'] = $aggregate;
  396. $_SESSION['tripal_feature_seq_extract']['agg_types'] = $agg_types;
  397. $_SESSION['tripal_feature_seq_extract']['download'] = 1;
  398. }
  399. /*
  400. *
  401. */
  402. function tripal_feature_seq_extract_get_features($org_commonname, $genus, $species, $analysis_name,
  403. $type, $feature_name, $upstream, $downstream, $output_format, $derive_from_parent, $aggregate, $child) {
  404. $sub_features = explode(',', $child);
  405. if (!$output_format) {
  406. $output_format = 'fasta_txt';
  407. }
  408. if (!$type and !$feature_name and !$genus) {
  409. print "Please provide a type, feature name or genus\n";
  410. return;
  411. }
  412. // get the list of features
  413. $vars = array();
  414. $sql = "SELECT DISTINCT F.feature_id, F.name, F.uniquename, O.genus, O.species, CVT.name as feature_type ".
  415. "FROM {feature} F ".
  416. " INNER JOIN {organism} O on O.organism_id = F.organism_id ".
  417. " INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id ";
  418. if ($analysis_name) {
  419. $sql .= " INNER JOIN {analysisfeature} AF on AF.feature_id = F.feature_id ".
  420. " INNER JOIN {analysis} A on AF.analysis_id = A.analysis_id ";
  421. }
  422. $sql .= "WHERE (1=1) ";
  423. if ($org_commonname) {
  424. $sql .= "AND O.common_name = '%s' ";
  425. $vars[] = $org_commonname;
  426. }
  427. if ($genus) {
  428. $sql .= "AND O.genus = '%s' ";
  429. $vars[] = $genus;
  430. }
  431. if ($species) {
  432. $sql .= "AND O.species = '%s' ";
  433. $vars[] = $species;
  434. }
  435. if ($type) {
  436. $sql .= "AND CVT.name = '%s' ";
  437. $vars[] = $type;
  438. }
  439. if ($feature_name) {
  440. $sql .= "AND F.name = '%s'";
  441. $vars[] = $feature_name;
  442. }
  443. if ($analysis_name) {
  444. $sql .= "AND A.name = '%s'";
  445. $vars[] = $analysis_name;
  446. }
  447. $num_bases_per_line = 50;
  448. $q = chado_query($sql, $vars);
  449. while ($feature = db_fetch_object($q)) {
  450. $feature_id = $feature->feature_id;
  451. $feature_name = "$feature->uniquename $feature->name $feature->feature_type ($feature->genus $feature->species)";
  452. $sequence = tripal_feature_get_formatted_sequence($feature_id, $feature_name,
  453. $num_bases_per_line, $derive_from_parent, $aggregate, $output_format,
  454. $upstream, $downstream, $sub_features);
  455. print $sequence;
  456. }
  457. }