data__sequence.inc 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. <?php
  2. class data__sequence extends TripalField {
  3. // --------------------------------------------------------------------------
  4. // EDITABLE STATIC CONSTANTS
  5. //
  6. // The following constants SHOULD be set for each descendent class. They are
  7. // used by the static functions to provide information to Drupal about
  8. // the field and it's default widget and formatter.
  9. // --------------------------------------------------------------------------
  10. // The default lable for this field.
  11. public static $default_label = 'Sequence';
  12. // The default description for this field.
  13. public static $description = 'A field for managing nucleotide and protein residues.';
  14. // Provide a list of global settings. These can be accessed witihn the
  15. // globalSettingsForm. When the globalSettingsForm is submitted then
  16. // Drupal will automatically change these settings for all fields.
  17. public static $default_settings = array(
  18. 'chado_table' => '',
  19. 'chado_column' => '',
  20. 'base_table' => '',
  21. );
  22. // Provide a list of instance specific settings. These can be access within
  23. // the instanceSettingsForm. When the instanceSettingsForm is submitted
  24. // then Drupal with automatically change these settings for the instnace.
  25. // It is recommended to put settings at the instance level whenever possible.
  26. // If you override this variable in a child class be sure to replicate the
  27. // term_name, term_vocab, term_accession and term_fixed keys as these are
  28. // required for all TripalFields.
  29. public static $default_instance_settings = array(
  30. // The short name for the vocabulary (e.g. shcema, SO, GO, PATO, etc.).
  31. 'term_vocabulary' => 'data',
  32. // The name of the term.
  33. 'term_name' => 'sequence',
  34. // The unique ID (i.e. accession) of the term.
  35. 'term_accession' => '2044',
  36. // Set to TRUE if the site admin is allowed to change the term
  37. // type. This will create form elements when editing the field instance
  38. // to allow the site admin to change the term settings above.
  39. 'term_fixed' => TRUE,
  40. );
  41. // Set this to the name of the storage backend that by default will support
  42. // this field.
  43. public static $storage = 'field_chado_storage';
  44. // The default widget for this field.
  45. public static $default_widget = 'data__sequence_widget';
  46. // The default formatter for this field.
  47. public static $default_formatter = 'data__sequence_formatter';
  48. /**
  49. * @see TripalField::load()
  50. */
  51. public function load($entity, $details = array()) {
  52. $field_name = $this->field['field_name'];
  53. $feature = $details['record'];
  54. // We don't want to get the sequence for traditionally large types. They are
  55. // too big, bog down the web browser, take longer to load and it's not
  56. // reasonable to print them on a page.
  57. if(strcmp($feature->type_id->name,'scaffold') != 0 and
  58. strcmp($feature->type_id->name,'chromosome') != 0 and
  59. strcmp($feature->type_id->name,'supercontig') != 0 and
  60. strcmp($feature->type_id->name,'pseudomolecule') != 0) {
  61. $feature = chado_expand_var($feature,'field','feature.residues');
  62. $entity->{$field_name}['und'][0]['value'] = $feature->residues;
  63. }
  64. /* // Add in sequences from alignments.
  65. $options = array(
  66. 'return_array' => 1,
  67. 'include_fk' => array(
  68. 'srcfeature_id' => array(
  69. 'type_id' => 1
  70. ),
  71. 'feature_id' => array(
  72. 'type_id' => 1
  73. ),
  74. ),
  75. );
  76. $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
  77. $featureloc_sequences = $this->get_featureloc_sequences($feature->feature_id, $feature->featureloc->feature_id);
  78. // Add in the coding sequences. It's faster to provide the SQL rather than
  79. // to use chado_generate_var based on the type.
  80. $sql = "
  81. SELECT F.*
  82. FROM {feature_relationship} FR
  83. INNER JOIN {feature} F on FR.subject_id = F.feature_id
  84. INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
  85. INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
  86. INNER JOIN {featureloc} FL on FL.feature_id = F.feature_id
  87. WHERE
  88. FR.object_id = :feature_id and
  89. CVT.name = 'CDS' and
  90. RCVT.name = 'part_of'
  91. ORDER BY FR.rank ASC
  92. ";
  93. $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
  94. $coding_seq = '';
  95. while ($CDS = $results->fetchObject()) {
  96. if ($CDS->residues) {
  97. $coding_seq .= $CDS->residues;
  98. }
  99. }
  100. if ($coding_seq) {
  101. $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
  102. '@type' => 'SO:0000316',
  103. 'type' => 'coding_sequence',
  104. 'label' => 'Coding sequence (CDS)',
  105. 'defline' => tripal_get_fasta_defline($feature, 'CDS', NULL, '', strlen($coding_seq)),
  106. 'residues' => $coding_seq,
  107. );
  108. }
  109. foreach($featureloc_sequences as $src => $attrs){
  110. // the $attrs array has the following keys
  111. // * id: a unique identifier combining the feature id with the cvterm id
  112. // * type: the type of sequence (e.g. mRNA, etc)
  113. // * location: the alignment location
  114. // * defline: the definition line
  115. // * formatted_seq: the formatted sequences
  116. // * featureloc: the feature object aligned to
  117. $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
  118. 'residues' => $attrs['residues'],
  119. '@type' => 'SO:0000110',
  120. 'type' => 'sequence_feature',
  121. 'defline' => tripal_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', strlen($attrs['residues'])),
  122. 'label' => 'Sequence from alignment at ' . $attrs['location'],
  123. );
  124. // check to see if this alignment has any CDS. If so, generate a CDS sequence
  125. $cds_sequence = tripal_get_feature_sequences(
  126. array(
  127. 'feature_id' => $feature->feature_id,
  128. 'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id,
  129. 'name' => $feature->name,
  130. 'featureloc_id' => $attrs['featureloc']->featureloc_id,
  131. ),
  132. array(
  133. 'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
  134. 'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
  135. 'sub_feature_types' => array('CDS'), // we're looking for CDS features
  136. 'is_html' => 0
  137. )
  138. );
  139. if (count($cds_sequence) > 0) {
  140. // the tripal_get_feature_sequences() function can return multiple sequences
  141. // if a feature is aligned to multiple places. In the case of CDSs we expect
  142. // that one mRNA is only aligned to a single location on the assembly so we
  143. // can access the CDS sequence with index 0.
  144. if ($cds_sequence[0]['residues']) {
  145. $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
  146. 'residues' => $cds_sequence[0]['residues'],
  147. '@type' => 'SO:0000316',
  148. 'type' => 'coding_sequence',
  149. 'defline' => tripal_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', $cds_sequence[0]['length']),
  150. 'label' => 'Coding sequence (CDS) from alignment at ' . $attrs['location'],
  151. );
  152. }
  153. }
  154. } */
  155. }
  156. }