tripal_views_handler_field_sequence.inc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. <?php
  2. /**
  3. * @file
  4. * A chado wrapper for the views_handler_field.
  5. *
  6. * Handles display of sequence data. If will aggregate sequences that need
  7. * to be aggregated (e.g. coding sequences) and provide
  8. */
  9. class tripal_views_handler_field_sequence extends chado_views_handler_field {
  10. function init(&$view, $options) {
  11. parent::init($view, $options);
  12. // to speed things up we need to make sure we have a persistent connection
  13. tripal_db_persistent_chado();
  14. if (!tripal_core_is_sql_prepared('sequence_by_parent')) {
  15. // prepare the queries we're going to use later during the render phase
  16. // This SQL statement uses conditionals in the select clause to handle
  17. // cases cases where the alignment is in the reverse direction and when
  18. // the upstream and downstream extensions go beyond the lenght of the
  19. // parent sequence.
  20. $psql ='PREPARE sequence_by_parent (int, int, int) AS
  21. SELECT
  22. OF.name srcname, FL.srcfeature_id, FL.strand, OCVT.name as srctypename, SCVT.name as typename,
  23. FL.fmin, FL.fmax,
  24. CASE
  25. WHEN FL.strand >= 0 THEN
  26. CASE
  27. WHEN FL.fmin - $1 <= 0 THEN 0
  28. ELSE FL.fmin - $1
  29. END
  30. WHEN FL.strand < 0 THEN
  31. CASE
  32. WHEN FL.fmin - $2 <= 0 THEN 0
  33. ELSE FL.fmin - $2
  34. END
  35. END as adjfmin,
  36. CASE
  37. WHEN FL.strand >= 0 THEN
  38. CASE
  39. WHEN FL.fmax + $2 > OF.seqlen THEN OF.seqlen
  40. ELSE FL.fmax + $2
  41. END
  42. WHEN FL.strand < 0 THEN
  43. CASE
  44. WHEN FL.fmax + $1 > OF.seqlen THEN OF.seqlen
  45. ELSE FL.fmax + $1
  46. END
  47. END as adjfmax,
  48. CASE
  49. WHEN FL.strand >= 0 THEN
  50. CASE
  51. WHEN FL.fmin - $1 <= 0 THEN FL.fmin
  52. ELSE $1
  53. END
  54. ELSE
  55. CASE
  56. WHEN FL.fmax + $1 > OF.seqlen THEN OF.seqlen - FL.fmax
  57. ELSE $1
  58. END
  59. END as upstream,
  60. CASE
  61. WHEN FL.strand >= 0 THEN
  62. CASE
  63. WHEN FL.fmax + $2 > OF.seqlen THEN OF.seqlen - FL.fmax
  64. ELSE $2
  65. END
  66. ELSE
  67. CASE
  68. WHEN FL.fmin - $2 <= 0 THEN FL.fmin
  69. ELSE $2
  70. END
  71. END as downstream,
  72. CASE
  73. WHEN FL.strand >= 0 THEN
  74. CASE
  75. WHEN FL.fmin - $1 <= 0 THEN substring(OF.residues from 1 for ((FL.fmax - FL.fmin) + $1 + $2))
  76. ELSE substring(OF.residues from (FL.fmin + 1 - $1) for ((FL.fmax - FL.fmin) + $1 + $2))
  77. END
  78. WHEN FL.strand < 0 THEN
  79. CASE
  80. WHEN FL.fmin - $2 <= 0 THEN substring(OF.residues from 1 for ((FL.fmax - FL.fmin) + $1 + $2))
  81. ELSE substring(OF.residues from (FL.fmin + 1 - $2) for ((FL.fmax - FL.fmin) + $1 + $2))
  82. END
  83. END as residues
  84. FROM featureloc FL
  85. INNER JOIN feature SF on FL.feature_id = SF.feature_id
  86. INNER JOIN cvterm SCVT on SF.type_id = SCVT.cvterm_id
  87. INNER JOIN feature OF on FL.srcfeature_id = OF.feature_id
  88. INNER JOIN cvterm OCVT on OF.type_id = OCVT.cvterm_id
  89. WHERE SF.feature_id = $3';
  90. $status = chado_query($psql);
  91. if (!$status) {
  92. watchdog('tripal_views_handler_field_sequence',
  93. "init: not able to prepare SQL statement '%name'",
  94. array('%name' => 'sequence_by_parent'), 'WATCHDOG ERROR');
  95. }
  96. // this query is meant to get all of the sub features of any given
  97. // feature (arg #1) and order them as they appear on the reference
  98. // feature (arg #2).
  99. $psql ='PREPARE sub_features (int, int) AS
  100. SELECT SF.feature_id, CVT.name as type_name, SF.type_id
  101. FROM feature_relationship FR
  102. INNER JOIN feature SF on SF.feature_id = FR.subject_id
  103. INNER JOIN cvterm CVT on CVT.cvterm_id = SF.type_id
  104. INNER JOIN featureloc FL on FL.feature_id = FR.subject_id
  105. INNER JOIN feature PF on PF.feature_id = FL.srcfeature_id
  106. WHERE FR.object_id = $1 and PF.feature_id = $2
  107. ORDER BY FL.fmin ASC';
  108. $status = chado_query($psql);
  109. if (!$status) {
  110. watchdog('tripal_views_handler_field_sequence',
  111. "init: not able to prepare SQL statement '%name'",
  112. array('%name' => 'ssub_features'), 'WATCHDOG ERROR');
  113. }
  114. $psql ='PREPARE count_sub_features (int, int) AS
  115. SELECT count(*) as num_children
  116. FROM feature_relationship FR
  117. INNER JOIN feature SF on SF.feature_id = FR.subject_id
  118. INNER JOIN cvterm CVT on CVT.cvterm_id = SF.type_id
  119. INNER JOIN featureloc FL on FL.feature_id = FR.subject_id
  120. INNER JOIN feature PF on PF.feature_id = FL.srcfeature_id
  121. WHERE FR.object_id = $1 and PF.feature_id = $2';
  122. $status = chado_query($psql);
  123. if (!$status) {
  124. watchdog('tripal_views_handler_field_sequence',
  125. "init: not able to prepare SQL statement '%name'",
  126. array('%name' => 'count_sub_features'), 'WATCHDOG ERROR');
  127. }
  128. }
  129. }
  130. /**
  131. * Defines the options form (form available to admin when they add a field to a view)
  132. */
  133. function options_form(&$form, &$form_state) {
  134. parent::options_form($form, $form_state);
  135. $form['display'] = array(
  136. '#type' => 'fieldset',
  137. '#title' => 'Format Output',
  138. '#description' => t('Alter the way a sequence is displayed')
  139. );
  140. $form['display']['num_bases_per_line'] = array(
  141. '#type' => 'textfield',
  142. '#title' => t('Number of bases per line'),
  143. '#description' => t('Specify the number of bases per line. An HTML <br> tag ' .
  144. 'will be inserted after the number of bases indicated. If no value is ' .
  145. 'provided. The sequence will be one long string (default)'),
  146. '#default_value' => $this->options['display']['num_bases_per_line'],
  147. );
  148. $form['display']['derive_from_parent'] = array(
  149. '#type' => 'checkbox',
  150. '#title' => t('Derive sequence from parent'),
  151. '#description' => t('Rather than use the sequence from the \'residues\' of this feature, you may ' .
  152. 'derive the sequence from the parent features to which it is aligned. This is useful in the case that the feature ' .
  153. 'does not have sequence associated with it and we need to get it through it\'s alignment. ' .
  154. 'Note: this will slow queries with large numbers of results on the page.'),
  155. '#default_value' => $this->options['display']['derive_from_parent'],
  156. );
  157. $form['display']['aggregate'] = array(
  158. '#type' => 'checkbox',
  159. '#title' => t('Aggregate sub features'),
  160. '#description' => t('If the feature has sub features (e.g. CDS of an mRNA) then check this '.
  161. 'box to filter the sequence to only include the sub features. Gaps between sub features will be '.
  162. 'excluded from the sequence. This is useful for obtaining a complete CDS from an mRNA '.
  163. 'without intronic sequence'),
  164. '#default_value' => $this->options['display']['aggregate'],
  165. );
  166. }
  167. /**
  168. * We need to add a few fields to our query
  169. */
  170. function query() {
  171. parent::query();
  172. // if we are going to get the sequence from the parent then
  173. // we will need to do more queries in the render function
  174. // and we must have the feature_id to do those
  175. if($this->options['display']['derive_from_parent']){
  176. $this->ensure_my_table();
  177. $this->query->add_field($this->table,'feature_id');
  178. $this->query->add_field($this->table,'name');
  179. }
  180. }
  181. /**
  182. * Prior to display of results we want to format the sequence
  183. */
  184. function render($values) {
  185. $residues = '';
  186. // get the number of bases to show per line
  187. $num_bases_per_line = $this->options['display']['num_bases_per_line'];
  188. // get the residues from the feature.residues column
  189. $field = $this->field_alias;
  190. // get the feature id
  191. $feature_id = $values->feature_feature_id;
  192. $feature_name = $values->feature_name;
  193. // the upstream and downstream values get set by the
  194. // tripal_views_handlers_filter_sequence.inc
  195. $upstream = $this->view->sequence_q['upstream'];
  196. $downstream = $this->view->sequence_q['downstream'];
  197. if (!$upstream) {
  198. $upstream = 0;
  199. }
  200. if (!$downstream) {
  201. $downstream = 0;
  202. }
  203. // if we need to get the sequence from the parent but there is no aggregation
  204. // then do so now.
  205. if ($this->options['display']['derive_from_parent']) {
  206. // execute our prepared statement
  207. if (tripal_core_is_sql_prepared('sequence_by_parent')) {
  208. $sql = "EXECUTE sequence_by_parent (%d, %d, %d)";
  209. $parents = chado_query($sql, $upstream, $downstream, $feature_id);
  210. }
  211. while ($parent = db_fetch_object($parents)) {
  212. $seq = ''; // initialize the sequence for each parent
  213. // if we are to aggregate then we will ignore the feature returned
  214. // by the query above and rebuild it using the sub features
  215. if ($this->options['display']['aggregate']){
  216. // now get the sub features that are located on the parent.
  217. $sql = "EXECUTE sub_features (%d, %d)";
  218. $children = chado_query($sql, $feature_id, $parent->srcfeature_id);
  219. $sql = "EXECUTE count_sub_features (%d, %d)";
  220. $num_children = db_fetch_object(chado_query($sql, $feature_id, $parent->srcfeature_id));
  221. // iterate through the sub features and concat their sequences. They
  222. // should already be in order.
  223. $types = '';
  224. $i = 0;
  225. while($child = db_fetch_object($children)) {
  226. // keep up with the types
  227. if(!in_array($child->type_name,$types)){
  228. $types[] = $child->type_name;
  229. }
  230. $sql = "EXECUTE sequence_by_parent (%d, %d, %d)";
  231. // if the first sub feature we need to include the upstream bases
  232. if ($i == 0 and $parent->strand >= 0) {
  233. // -------------------------- ref
  234. // ....----> ---->
  235. // up 1 2
  236. $q = chado_query($sql, $upstream, 0, $child->feature_id);
  237. }
  238. elseif ($i == 0 and $parent->strand < 0) {
  239. // -------------------------- ref
  240. // ....<---- <----
  241. // down 1 2
  242. $q = chado_query($sql, 0, $downstream, $child->feature_id);
  243. }
  244. // if the last sub feature we need to include the downstream bases
  245. elseif ($i == $num_children->num_children - 1 and $parent->strand >= 0) {
  246. // -------------------------- ref
  247. // ----> ---->....
  248. // 1 2 down
  249. $q = chado_query($sql, 0, $downstream, $child->feature_id);
  250. }
  251. elseif ($i == $num_children->num_children - 1 and $parent->strand < 0) {
  252. // -------------------------- ref
  253. // <---- <----....
  254. // 1 2 up
  255. $q = chado_query($sql, $upstream, 0, $child->feature_id);
  256. }
  257. // for internal sub features we don't want upstream or downstream bases
  258. else {
  259. $sql = "EXECUTE sequence_by_parent (%d, %d, %d)";
  260. $q = chado_query($sql, 0, 0, $child->feature_id);
  261. }
  262. while($subseq = db_fetch_object($q)){
  263. // concatenate the sequences of all the sub features
  264. if($subseq->srcfeature_id == $parent->srcfeature_id){
  265. $seq .= $subseq->residues;
  266. }
  267. }
  268. $i++;
  269. }
  270. }
  271. // if this isn't an aggregate then use the parent residues
  272. else {
  273. $seq = $parent->residues;
  274. }
  275. // get the reverse compliment if feature is on the reverse strand
  276. $dir = 'forward';
  277. if ($parent->strand < 0) {
  278. $seq = trpial_feature_reverse_complement($seq);
  279. $dir = 'reverse';
  280. }
  281. // now format for display
  282. $seq = wordwrap($seq, $num_bases_per_line, "<br>", TRUE);
  283. $residues .= ">$feature_name ($parent->typename) $parent->srcname:" . ($parent->adjfmin + 1) . ".." . $parent->adjfmax ." ($dir). ";
  284. if (count($types) > 0) {
  285. $residues .= "Excludes all bases but those of type(s): " . implode(', ',$types) . ". " ;
  286. }
  287. if ($parent->upstream > 0) {
  288. $residues .= "Includes " . $parent->upstream . " bases upstream. ";
  289. }
  290. if ($parent->downstream > 0) {
  291. $residues .= "Includes " . $parent->downstream . " bases downstream. ";
  292. }
  293. if (!$seq) {
  294. $residues .= "<br>\nNo sequence available\n<br>";
  295. }
  296. else {
  297. $residues .= "<br>\n" . $seq . "\n<br>";
  298. }
  299. }
  300. }
  301. // if we are not getting the sequence from the parent sequence then
  302. // use what comes through from the feature record
  303. else {
  304. $residues = $values->$field;
  305. $residues = wordwrap($residues, $num_bases_per_line, "<br>", TRUE);
  306. }
  307. // format the residues for display
  308. if($residues and $num_bases_per_line){
  309. $residues = '<span style="font-family: monospace;">' . $residues . '</span>';
  310. }
  311. return $residues;
  312. }
  313. }