tripal_views_handler_field_sequence.inc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. <?php
  2. /**
  3. * @file
  4. * A chado wrapper for the views_handler_field.
  5. *
  6. * Handles display of sequence data. If will aggregate sequences that need
  7. * to be aggregated (e.g. coding sequences) and provide
  8. */
  9. class tripal_views_handler_field_sequence extends chado_views_handler_field {
  10. function init(&$view, $options) {
  11. parent::init($view, $options);
  12. // to speed things up we need to make sure we have a persistent connection
  13. tripal_db_persistent_chado();
  14. if (!tripal_core_is_sql_prepared('sequence_by_parent')) {
  15. // prepare the queries we're going to use later during the render phase
  16. // This SQL statement uses conditionals in the select clause to handle
  17. // cases cases where the alignment is in the reverse direction and when
  18. // the upstream and downstream extensions go beyond the lenght of the
  19. // parent sequence.
  20. $psql ='PREPARE sequence_by_parent (int, int, int) AS
  21. SELECT
  22. OF.name srcname, FL.srcfeature_id, FL.strand,
  23. CASE
  24. WHEN FL.strand >= 0 THEN
  25. CASE
  26. WHEN FL.fmin - $1 <= 0 THEN 0
  27. ELSE FL.fmin - $1
  28. END
  29. WHEN FL.strand < 0 THEN
  30. CASE
  31. WHEN FL.fmin - $1 <= 0 THEN 0
  32. ELSE FL.fmin - $1
  33. END
  34. END as adjfmin,
  35. CASE
  36. WHEN FL.strand >= 0 THEN
  37. CASE
  38. WHEN FL.fmax + $2 > OF.seqlen THEN OF.seqlen
  39. ELSE FL.fmax + $2
  40. END
  41. WHEN FL.strand < 0 THEN
  42. CASE
  43. WHEN FL.fmax + $2 > OF.seqlen THEN OF.seqlen
  44. ELSE FL.fmax + $2
  45. END
  46. END as adjfmax,
  47. CASE
  48. WHEN FL.strand >= 0 THEN
  49. CASE
  50. WHEN FL.fmin - $1 <= 0 THEN FL.fmin
  51. ELSE $1
  52. END
  53. ELSE
  54. CASE
  55. WHEN FL.fmax + $1 > OF.seqlen THEN OF.seqlen - FL.fmax
  56. ELSE $1
  57. END
  58. END as upstream,
  59. CASE
  60. WHEN FL.strand >= 0 THEN
  61. CASE
  62. WHEN FL.fmax + $2 > OF.seqlen THEN OF.seqlen - FL.fmax
  63. ELSE $2
  64. END
  65. ELSE
  66. CASE
  67. WHEN FL.fmin - $2 <= 0 THEN FL.fmin
  68. ELSE $2
  69. END
  70. END as downstream,
  71. CASE
  72. WHEN FL.strand >= 0 THEN
  73. CASE
  74. WHEN FL.fmin - $1 <= 0 THEN substring(OF.residues from 1 for ((FL.fmax - FL.fmin) + $1 + $2))
  75. ELSE substring(OF.residues from (FL.fmin + 1 - $1) for ((FL.fmax - FL.fmin) + $1 + $2))
  76. END
  77. WHEN FL.strand < 0 THEN
  78. CASE
  79. WHEN FL.fmin - $2 <= 0 THEN substring(OF.residues from 1 for ((FL.fmax - FL.fmin) + $1 + $2))
  80. ELSE substring(OF.residues from (FL.fmin + 1 - $2) for ((FL.fmax - FL.fmin) + $1 + $2))
  81. END
  82. END as residues
  83. FROM featureloc FL
  84. INNER JOIN feature SF on FL.feature_id = SF.feature_id
  85. INNER JOIN feature OF on FL.srcfeature_id = OF.feature_id
  86. WHERE SF.feature_id = $3';
  87. $status = chado_query($psql);
  88. if (!$status) {
  89. watchdog('tripal_views_handler_field_sequence',
  90. "init: not able to prepare SQL statement '%name'",
  91. array('%name' => 'sequence_by_parent'), 'WATCHDOG ERROR');
  92. }
  93. // this query is meant to get all of the sub features of any given
  94. // feature (arg #1) and order them as they appear on the reference
  95. // feature (arg #2).
  96. $psql ='PREPARE sub_features (int, int) AS
  97. SELECT SF.feature_id, CVT.name as type_name, SF.type_id
  98. FROM feature_relationship FR
  99. INNER JOIN feature SF on SF.feature_id = FR.subject_id
  100. INNER JOIN cvterm CVT on CVT.cvterm_id = SF.type_id
  101. INNER JOIN featureloc FL on FL.feature_id = FR.subject_id
  102. INNER JOIN feature PF on PF.feature_id = FL.srcfeature_id
  103. WHERE FR.object_id = $1 and PF.feature_id = $2
  104. ORDER BY FL.fmin ASC';
  105. $status = chado_query($psql);
  106. if (!$status) {
  107. watchdog('tripal_views_handler_field_sequence',
  108. "init: not able to prepare SQL statement '%name'",
  109. array('%name' => 'ssub_features'), 'WATCHDOG ERROR');
  110. }
  111. $psql ='PREPARE count_sub_features (int, int) AS
  112. SELECT count(*) as num_children
  113. FROM feature_relationship FR
  114. INNER JOIN feature SF on SF.feature_id = FR.subject_id
  115. INNER JOIN cvterm CVT on CVT.cvterm_id = SF.type_id
  116. INNER JOIN featureloc FL on FL.feature_id = FR.subject_id
  117. INNER JOIN feature PF on PF.feature_id = FL.srcfeature_id
  118. WHERE FR.object_id = $1 and PF.feature_id = $2';
  119. $status = chado_query($psql);
  120. if (!$status) {
  121. watchdog('tripal_views_handler_field_sequence',
  122. "init: not able to prepare SQL statement '%name'",
  123. array('%name' => 'count_sub_features'), 'WATCHDOG ERROR');
  124. }
  125. }
  126. }
  127. /**
  128. * Defines the options form (form available to admin when they add a field to a view)
  129. */
  130. function options_form(&$form, &$form_state) {
  131. parent::options_form($form, $form_state);
  132. $form['display'] = array(
  133. '#type' => 'fieldset',
  134. '#title' => 'Format Output',
  135. '#description' => t('Alter the way a sequence is displayed')
  136. );
  137. $form['display']['num_bases_per_line'] = array(
  138. '#type' => 'textfield',
  139. '#title' => t('Number of bases per line'),
  140. '#description' => t('Specify the number of bases per line. An HTML <br> tag ' .
  141. 'will be inserted after the number of bases indicated. If no value is ' .
  142. 'provided. The sequence will be one long string (default)'),
  143. '#default_value' => $this->options['display']['num_bases_per_line'],
  144. );
  145. $form['display']['derive_from_parent'] = array(
  146. '#type' => 'checkbox',
  147. '#title' => t('Derive sequence from parent'),
  148. '#description' => t('Rather than use the sequence from the \'residues\' of this feature, you may ' .
  149. 'derive the sequence from the parent features to which it is aligned. This is useful in the case that the feature ' .
  150. 'does not have sequence associated with it and we need to get it through it\'s alignment. ' .
  151. 'Note: this will slow queries with large numbers of results on the page.'),
  152. '#default_value' => $this->options['display']['derive_from_parent'],
  153. );
  154. $form['display']['aggregate'] = array(
  155. '#type' => 'checkbox',
  156. '#title' => t('Aggregate sub features'),
  157. '#description' => t('If the feature has sub features (e.g. CDS of an mRNA) then check this '.
  158. 'box to filter the sequence to only include the sub features. Gaps between sub features will be '.
  159. 'excluded from the sequence. This is useful for obtaining a complete CDS from an mRNA '.
  160. 'without intronic sequence'),
  161. '#default_value' => $this->options['display']['aggregate'],
  162. );
  163. }
  164. function query() {
  165. parent::query();
  166. // if we are going to get the sequence from the parent then
  167. // we will need to do more queries in the render function
  168. // and we must have the feature_id to do those
  169. if($this->options['display']['derive_from_parent']){
  170. $this->ensure_my_table();
  171. $this->query->add_field($this->table,'feature_id');
  172. }
  173. }
  174. /**
  175. * Prior to display of results we want to format the sequence
  176. */
  177. function render($values) {
  178. $residues = '';
  179. // get the number of bases to show per line
  180. $num_bases_per_line = $this->options['display']['num_bases_per_line'];
  181. // get the residues from the feature.residues column
  182. $field = $this->field_alias;
  183. // get the feature id
  184. $feature_id = $values->feature_feature_id;
  185. // the upstream and downstream values get set by the
  186. // tripal_views_handlers_filter_sequence.inc
  187. $upstream = $this->view->sequence_q['upstream'];
  188. $downstream = $this->view->sequence_q['downstream'];
  189. if (!$upstream) {
  190. $upstream = 0;
  191. }
  192. if (!$downstream) {
  193. $downstream = 0;
  194. }
  195. // if we need to get the sequence from the parent but there is no aggregation
  196. // then do so now.
  197. if ($this->options['display']['derive_from_parent']) {
  198. // execute our prepared statement
  199. if (tripal_core_is_sql_prepared('sequence_by_parent')) {
  200. $sql = "EXECUTE sequence_by_parent (%d, %d, %d)";
  201. $parents = chado_query($sql, $upstream, $downstream, $feature_id);
  202. }
  203. while ($parent = db_fetch_object($parents)) {
  204. $seq = ''; // initialize the sequence for each parent
  205. // if we are to aggregate then we will ignore the feature returned
  206. // by the query above and rebuild it using the sub features
  207. if ($this->options['display']['aggregate']){
  208. // now get the sub features that are located on the parent.
  209. $sql = "EXECUTE sub_features (%d, %d)";
  210. $children = chado_query($sql, $feature_id, $parent->srcfeature_id);
  211. $sql = "EXECUTE count_sub_features (%d, %d)";
  212. $num_children = db_fetch_object(chado_query($sql, $feature_id, $parent->srcfeature_id));
  213. // iterate through the sub features and concat their sequences. They
  214. // should already be in order.
  215. $types = array();
  216. $i = 0;
  217. while($child = db_fetch_object($children)) {
  218. // keep up with the types
  219. if(!in_array($child->type_name,$types)){
  220. $types[] = $child->type_name;
  221. }
  222. $sql = "EXECUTE sequence_by_parent (%d, %d, %d)";
  223. // if the first sub feature we need to include the upstream bases
  224. if ($i == 0 and $parent->strand >= 0) {
  225. // -------------------------- ref
  226. // ....----> ---->
  227. // up 1 2
  228. $q = chado_query($sql, $upstream, 0, $child->feature_id);
  229. }
  230. elseif ($i == 0 and $parent->strand < 0) {
  231. // -------------------------- ref
  232. // ....<---- <----
  233. // down 1 2
  234. $q = chado_query($sql, 0, $downstream, $child->feature_id);
  235. }
  236. // if the last sub feature we need to include the downstream bases
  237. elseif ($i == $num_children->num_children - 1 and $parent->strand >= 0) {
  238. // -------------------------- ref
  239. // ----> ---->....
  240. // 1 2 down
  241. $q = chado_query($sql, 0, $downstream, $child->feature_id);
  242. }
  243. elseif ($i == $num_children->num_children - 1 and $parent->strand < 0) {
  244. // -------------------------- ref
  245. // <---- <----....
  246. // 1 2 up
  247. $q = chado_query($sql, $upstream, 0, $child->feature_id);
  248. }
  249. // for internal sub features we don't want upstream or downstream bases
  250. else {
  251. $sql = "EXECUTE sequence_by_parent (%d, %d, %d)";
  252. $q = chado_query($sql, 0, 0, $child->feature_id);
  253. }
  254. $j = 0;
  255. while($subseq = db_fetch_object($q)){
  256. // there should only be one mapping for each sub feature to the
  257. // same parent. If there are more then we can't resolve it so
  258. // return a message
  259. if($j > 0 ){
  260. return 'Cannot determine sequence. Sub features map to multiple locations';
  261. }
  262. // concatenate the sequences of all the sub features
  263. $seq .= $subseq->residues;
  264. $j++;
  265. }
  266. $i++;
  267. }
  268. }
  269. // if this isn't an aggregate then use the parent residues
  270. else {
  271. $seq = $parent->residues;
  272. }
  273. // get the reverse compliment if feature is on the reverse strand
  274. $dir = 'forward';
  275. if ($parent->strand < 0) {
  276. $seq = trpial_feature_reverse_complement($seq);
  277. $dir = 'reverse';
  278. }
  279. // now format for display
  280. $seq = wordwrap($seq, $num_bases_per_line, "<br>", TRUE);
  281. $residues .= ">" . $parent->srcname . ":" . ($parent->adjfmin + 1) . ".." . $parent->adjfmax ." ($dir). ";
  282. if (count($types) > 0) {
  283. $residues .= "Excludes all bases but those of type(s): " . implode(', ',$types) . ". " ;
  284. }
  285. if ($parent->upstream > 0) {
  286. $residues .= "Includes " . $parent->upstream . " bases upstream. ";
  287. }
  288. if ($parent->downstream > 0) {
  289. $residues .= "Includes " . $parent->downstream . " bases downstream. ";
  290. }
  291. $residues .= "<br>\n$seq\n<br>";
  292. }
  293. }
  294. // if we are not getting the sequence from the parent sequence then
  295. // use what comes through from the feature record
  296. else {
  297. $residues = $values->$field;
  298. $residues = wordwrap($residues, $num_bases_per_line, "<br>", TRUE);
  299. }
  300. // format the residues for display
  301. if($residues and $num_bases_per_line){
  302. $residues = '<span style="font-family: monospace;">' . $residues . '</span>';
  303. }
  304. return $residues;
  305. }
  306. }