data__sequence.inc 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. <?php
  2. class data__sequence extends TripalField {
  3. // --------------------------------------------------------------------------
  4. // EDITABLE STATIC CONSTANTS
  5. //
  6. // The following constants SHOULD be set for each descendent class. They are
  7. // used by the static functions to provide information to Drupal about
  8. // the field and it's default widget and formatter.
  9. // --------------------------------------------------------------------------
  10. // The term that this field maps to. The format for the term should be:
  11. // [vocab]:[accession] where [vocab] is the short name of the vocabulary
  12. // and [acession] is the unique accession number for the term. This term
  13. // must already exist in the vocabulary storage backend. This
  14. // value should never be changed once fields exist for this type.
  15. public static $term = 'data:2044';
  16. // The default lable for this field.
  17. public static $label = 'Sequence';
  18. // The default description for this field.
  19. public static $description = 'A field for managing nucleotide and protein residues.';
  20. // Provide a list of global settings. These can be accessed witihn the
  21. // globalSettingsForm. When the globalSettingsForm is submitted then
  22. // Drupal will automatically change these settings for all fields.
  23. public static $settings = array(
  24. 'chado_table' => '',
  25. 'chado_column' => '',
  26. 'base_table' => '',
  27. );
  28. // Provide a list of instance specific settings. These can be access within
  29. // the instanceSettingsForm. When the instanceSettingsForm is submitted
  30. // then Drupal with automatically change these settings for the instnace.
  31. // It is recommended to put settings at the instance level whenever possible.
  32. public static $instance_settings = array();
  33. // Set this to the name of the storage backend that by default will support
  34. // this field.
  35. public static $storage = 'tripal_no_storage';
  36. // The default widget for this field.
  37. public static $default_widget = 'data__sequence_widget';
  38. // The default formatter for this field.
  39. public static $default_formatter = 'data__sequence_formatter';
  40. /**
  41. * @see TripalField::load()
  42. */
  43. public function load($entity, $details = array()) {
  44. $field_name = $this->field['field_name'];
  45. $feature = $details['record'];
  46. // We don't want to get the sequence for traditionally large types. They are
  47. // too big, bog down the web browser, take longer to load and it's not
  48. // reasonable to print them on a page.
  49. if(strcmp($feature->type_id->name,'scaffold') != 0 and
  50. strcmp($feature->type_id->name,'chromosome') != 0 and
  51. strcmp($feature->type_id->name,'supercontig') != 0 and
  52. strcmp($feature->type_id->name,'pseudomolecule') != 0) {
  53. $feature = chado_expand_var($feature,'field','feature.residues');
  54. $entity->{$field_name}['und'][0]['value'] = $feature->residues;
  55. }
  56. /* // Add in sequences from alignments.
  57. $options = array(
  58. 'return_array' => 1,
  59. 'include_fk' => array(
  60. 'srcfeature_id' => array(
  61. 'type_id' => 1
  62. ),
  63. 'feature_id' => array(
  64. 'type_id' => 1
  65. ),
  66. ),
  67. );
  68. $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
  69. $featureloc_sequences = $this->get_featureloc_sequences($feature->feature_id, $feature->featureloc->feature_id);
  70. // Add in the coding sequences. It's faster to provide the SQL rather than
  71. // to use chado_generate_var based on the type.
  72. $sql = "
  73. SELECT F.*
  74. FROM {feature_relationship} FR
  75. INNER JOIN {feature} F on FR.subject_id = F.feature_id
  76. INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
  77. INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
  78. INNER JOIN {featureloc} FL on FL.feature_id = F.feature_id
  79. WHERE
  80. FR.object_id = :feature_id and
  81. CVT.name = 'CDS' and
  82. RCVT.name = 'part_of'
  83. ORDER BY FR.rank ASC
  84. ";
  85. $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
  86. $coding_seq = '';
  87. while ($CDS = $results->fetchObject()) {
  88. if ($CDS->residues) {
  89. $coding_seq .= $CDS->residues;
  90. }
  91. }
  92. if ($coding_seq) {
  93. $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
  94. '@type' => 'SO:0000316',
  95. 'type' => 'coding_sequence',
  96. 'label' => 'Coding sequence (CDS)',
  97. 'defline' => tripal_get_fasta_defline($feature, 'CDS', NULL, '', strlen($coding_seq)),
  98. 'residues' => $coding_seq,
  99. );
  100. }
  101. foreach($featureloc_sequences as $src => $attrs){
  102. // the $attrs array has the following keys
  103. // * id: a unique identifier combining the feature id with the cvterm id
  104. // * type: the type of sequence (e.g. mRNA, etc)
  105. // * location: the alignment location
  106. // * defline: the definition line
  107. // * formatted_seq: the formatted sequences
  108. // * featureloc: the feature object aligned to
  109. $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
  110. 'residues' => $attrs['residues'],
  111. '@type' => 'SO:0000110',
  112. 'type' => 'sequence_feature',
  113. 'defline' => tripal_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', strlen($attrs['residues'])),
  114. 'label' => 'Sequence from alignment at ' . $attrs['location'],
  115. );
  116. // check to see if this alignment has any CDS. If so, generate a CDS sequence
  117. $cds_sequence = tripal_get_feature_sequences(
  118. array(
  119. 'feature_id' => $feature->feature_id,
  120. 'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id,
  121. 'name' => $feature->name,
  122. 'featureloc_id' => $attrs['featureloc']->featureloc_id,
  123. ),
  124. array(
  125. 'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
  126. 'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
  127. 'sub_feature_types' => array('CDS'), // we're looking for CDS features
  128. 'is_html' => 0
  129. )
  130. );
  131. if (count($cds_sequence) > 0) {
  132. // the tripal_get_feature_sequences() function can return multiple sequences
  133. // if a feature is aligned to multiple places. In the case of CDSs we expect
  134. // that one mRNA is only aligned to a single location on the assembly so we
  135. // can access the CDS sequence with index 0.
  136. if ($cds_sequence[0]['residues']) {
  137. $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
  138. 'residues' => $cds_sequence[0]['residues'],
  139. '@type' => 'SO:0000316',
  140. 'type' => 'coding_sequence',
  141. 'defline' => tripal_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', $cds_sequence[0]['length']),
  142. 'label' => 'Coding sequence (CDS) from alignment at ' . $attrs['location'],
  143. );
  144. }
  145. }
  146. } */
  147. }
  148. }