Selaa lähdekoodia

Simplify data__sequence field.

Lacey Sanderson 6 vuotta sitten
vanhempi
commit
c5af230a4d

+ 32 - 99
tripal_chado/includes/TripalFields/data__sequence/data__sequence.inc

@@ -64,6 +64,18 @@ class data__sequence extends ChadoField {
         'searchable' => FALSE,
         'type' => 'xs:string',
         'readonly' => FALSE,
+        'elements' => [
+          'data:2044' => [
+            'name' => 'sequence',
+            'searchable' => FALSE,
+          ],
+          // @todo add this cvterm to the db.
+          // https://www.ebi.ac.uk/ols/ontologies/ncit/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FNCIT_C47845
+          'NCIT:C47845' => [
+            'name' => 'FASTA Format',
+            'searchable' => FALSE,
+          ],
+        ],
       ],
     ];
   }
@@ -73,107 +85,28 @@ class data__sequence extends ChadoField {
    */
   public function load($entity) {
     $field_name = $this->field['field_name'];
-    $feature = $entity->chado_record;
+    $feature = (array) $entity->chado_record;
 
-    $feature = chado_expand_var($feature, 'field', 'feature.residues');
-    $entity->{$field_name}['und'][0]['value'] = $feature->residues;
+    // Retrieve all the sequence for the given feature.
+    $sequences = chado_get_feature_sequences($feature, []);
+    dpm($sequences, 'seq');
+    // If there are no direct residues then try to derive from the parent.
+    if (empty($sequences) OR (sizeof($sequences) == 1 AND empty($sequences[0]['residues']))) {
+      $sequences = chado_get_feature_sequences($feature, ['derive_from_parent' => 1]);
+    }
 
-    /* // Add in sequences from alignments.
-       $options = array(
-         'return_array' => 1,
-         'include_fk' => array(
-           'srcfeature_id' => array(
-             'type_id' => 1
-           ),
-           'feature_id' => array(
-             'type_id' => 1
-           ),
-         ),
-       );
-       $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
-       $featureloc_sequences = $this->get_featureloc_sequences($feature->feature_id, $feature->featureloc->feature_id);
-   
-       // Add in the coding sequences. It's faster to provide the SQL rather than
-       // to use chado_generate_var based on the type.
-       $sql = "
-         SELECT F.*
-         FROM {feature_relationship} FR
-           INNER JOIN {feature} F on FR.subject_id = F.feature_id
-           INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
-           INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
-           INNER JOIN {featureloc} FL on FL.feature_id = F.feature_id
-         WHERE
-           FR.object_id = :feature_id and
-           CVT.name = 'CDS' and
-           RCVT.name = 'part_of'
-         ORDER BY FR.rank ASC
-       ";
-       $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
-       $coding_seq = '';
-       while ($CDS = $results->fetchObject()) {
-         if ($CDS->residues) {
-           $coding_seq .= $CDS->residues;
-         }
-       }
-       if ($coding_seq) {
-         $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-           '@type' => 'SO:0000316',
-           'type' => 'coding_sequence',
-           'label' => 'Coding sequence (CDS)',
-           'defline' => chado_get_fasta_defline($feature, 'CDS', NULL, '', strlen($coding_seq)),
-           'residues' => $coding_seq,
-         );
-       }
-   
-       foreach($featureloc_sequences as $src => $attrs){
-         // the $attrs array has the following keys
-         //   * id:  a unique identifier combining the feature id with the cvterm id
-         //   * type: the type of sequence (e.g. mRNA, etc)
-         //   * location:  the alignment location
-         //   * defline: the definition line
-         //   * formatted_seq: the formatted sequences
-         //   * featureloc:  the feature object aligned to
-         $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-           'residues' => $attrs['residues'],
-           '@type' => 'SO:0000110',
-           'type' => 'sequence_feature',
-           'defline' => chado_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', strlen($attrs['residues'])),
-           'label' => 'Sequence from alignment at ' . $attrs['location'],
-         );
-   
-   
-         // check to see if this alignment has any CDS. If so, generate a CDS sequence
-         $cds_sequence = chado_get_feature_sequences(
-             array(
-               'feature_id' => $feature->feature_id,
-               'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id,
-               'name' => $feature->name,
-               'featureloc_id' => $attrs['featureloc']->featureloc_id,
-             ),
-             array(
-               'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
-               'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
-               'sub_feature_types' => array('CDS'), // we're looking for CDS features
-               'is_html' => 0
-             )
-             );
-   
-         if (count($cds_sequence) > 0) {
-           // the chado_get_feature_sequences() function can return multiple sequences
-           // if a feature is aligned to multiple places. In the case of CDSs we expect
-           // that one mRNA is only aligned to a single location on the assembly so we
-           // can access the CDS sequence with index 0.
-           if ($cds_sequence[0]['residues']) {
-             $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-               'residues' => $cds_sequence[0]['residues'],
-               '@type' => 'SO:0000316',
-               'type' => 'coding_sequence',
-               'defline' => chado_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', $cds_sequence[0]['length']),
-               'label' => 'Coding sequence (CDS) from alignment at  ' . $attrs['location'],
-             );
-           }
-         }
-       } */
+    // Add each sequence to the field as a fasta record.
+    // NOTE: We keep the residues separate in case the formatter wants to
+    //   determine it's own fasta record defline.
+    // @debug dpm($sequences, 'sequences');
+    foreach ($sequences as $k => $seq) {
+      if (!empty($seq['residues'])) {
+        $entity->{$field_name}['und'][$k]['value'] = [
+          'NCIT:C47845' => '>' . $seq['defline'] . "\n" . $seq['residues'],
+          'data:2044' => $seq['residues'],
+        ];
+      }
+    }
   }
 }
 

+ 13 - 9
tripal_chado/includes/TripalFields/data__sequence/data__sequence_formatter.inc

@@ -14,7 +14,7 @@ class data__sequence_formatter extends ChadoFieldFormatter {
   public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
 
     // If there are no items, we don't want to return any markup.
-    if (count($items) == 0 or (count($items) == 1 and empty($items[0]['value']))) {
+    if (count($items) == 0 or (count($items) == 1 and empty($items[0]['value']['data:2044']))) {
       $element[0] = [
         '#type' => 'markup',
         '#markup' => 'No sequence is available.',
@@ -23,13 +23,17 @@ class data__sequence_formatter extends ChadoFieldFormatter {
     }
 
     $num_bases = 50;
-    $content = '<pre class="residues-formatter">';
-    $content .= wordwrap($items[0]['value'], $num_bases, "<br>", TRUE);
-    $content .= '</pre>';
-    $element[0] = [
-      // We create a render array to produce the desired markup,
-      '#type' => 'markup',
-      '#markup' => $content,
-    ];
+
+    foreach ($items as $k => $d) {
+      $rendered_seq = '<pre class="residues-formatter">';
+      $rendered_seq .= $d['value']['NCIT:C47845']; // render the fasta record.
+      $rendered_seq .= '</pre>';
+
+      $element[$k] = [
+        // We create a render array to produce the desired markup,
+        '#type' => 'markup',
+        '#markup' => $rendered_seq,
+      ];
+    }
   }
 }