Browse Source

Merge branch '7.x-3.x' into 1040-tv3-gff3_performance

Stephen Ficklin 4 years ago
parent
commit
e490f5de55

+ 4 - 0
docs/extensions/data_input.rst

@@ -6,6 +6,10 @@ The following modules provide interfaces for collection and/or loading of biolog
 Genotype Loader
 ----------------
 
+.. image:: https://tripal.readthedocs.io/en/7.x-3.x/_images/Tripal-Gold.png
+  :target: https://tripal.readthedocs.io/en/7.x-3.x/extensions/module_rating.html#Gold
+  :alt: Tripal Rating: Gold
+  
 A Drush-based loader for VCF files that follows the genotype storage rules outline by ND genotypes. It has been optimized to handle very large files and supports customization of ontology terms used.
 
 `Documentation <https://genotypes-loader.readthedocs.io/en/latest/>`__

+ 7 - 5
tripal/includes/tripal.fields.inc

@@ -911,11 +911,13 @@ function tripal_form_field_ui_display_overview_form_alter(&$form, &$form_state,
   $fields_names = element_children($form['fields']);
   foreach ($fields_names as $field_name) {
     $field_info = field_info_field($field_name);
-    if ($field_info['type'] == 'kvproperty_adder') {
-      unset($form['fields'][$field_name]);
-    }
-    if ($field_info['type'] == 'cvterm_class_adder') {
-      unset($form['fields'][$field_name]);
+    if ($field_info) {
+      if ($field_info['type'] == 'kvproperty_adder') {
+        unset($form['fields'][$field_name]);
+      }
+      if ($field_info['type'] == 'cvterm_class_adder') {
+        unset($form['fields'][$field_name]);
+      }
     }
   }
 }

+ 1 - 7
tripal_chado/includes/TripalFields/data__protein_sequence/data__protein_sequence.inc

@@ -93,18 +93,12 @@ class data__protein_sequence extends ChadoField {
       WHERE
         FR.object_id = :feature_id and
         CVT.name = 'polypeptide' and
-        RCVT.name  IN ('derives_from', 'part_of')
+        RCVT.name IN ('derives_from', 'part_of')
       ORDER BY FR.rank ASC
     ";
     $proteins = chado_query($sql, [':feature_id' => $feature->feature_id]);
     while ($protein = $proteins->fetchObject()) {
       $entity->{$field_name}['und'][$num_seqs]['value'] = $protein->residues;
-      // Because we'll be saving a feature we need to maintain all of it's
-      // columns in the feature table. The following will add them all.
-      $columns = get_object_vars($protein);
-      foreach ($columns as $colname => $value) {
-        $entity->{$field_name}['und'][$num_seqs]['chado-feature__' . $colname] = $value;
-      }
       $num_seqs++;
     }
   }

+ 2 - 1
tripal_chado/includes/TripalFields/data__protein_sequence/data__protein_sequence_formatter.inc

@@ -12,7 +12,8 @@ class data__protein_sequence_formatter extends ChadoFieldFormatter {
    * @see TripalFieldFormatter::view()
    */
   public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
-    $content = 'There is no protein sequence.';
+
+    $content = 'There is no protein sequence available.';
     if (count($items) > 0 and $items[0]['value']) {
       $num_bases = 50;
       $content = '<pre class="protein-residues-formatter">';

+ 1 - 98
tripal_chado/includes/TripalFields/data__sequence/data__sequence.inc

@@ -15,7 +15,7 @@ class data__sequence extends ChadoField {
   public static $default_label = 'Sequence';
 
   // The default description for this field.
-  public static $description = 'A field for managing nucleotide and protein residues.';
+  public static $description = 'A field for managing the primary sequence for a feature.';
 
   // Provide a list of instance specific settings. These can be accessed within
   // the instanceSettingsForm.  When the instanceSettingsForm is submitted
@@ -77,103 +77,6 @@ class data__sequence extends ChadoField {
 
     $feature = chado_expand_var($feature, 'field', 'feature.residues');
     $entity->{$field_name}['und'][0]['value'] = $feature->residues;
-
-    /* // Add in sequences from alignments.
-       $options = array(
-         'return_array' => 1,
-         'include_fk' => array(
-           'srcfeature_id' => array(
-             'type_id' => 1
-           ),
-           'feature_id' => array(
-             'type_id' => 1
-           ),
-         ),
-       );
-       $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
-       $featureloc_sequences = $this->get_featureloc_sequences($feature->feature_id, $feature->featureloc->feature_id);
-   
-       // Add in the coding sequences. It's faster to provide the SQL rather than
-       // to use chado_generate_var based on the type.
-       $sql = "
-         SELECT F.*
-         FROM {feature_relationship} FR
-           INNER JOIN {feature} F on FR.subject_id = F.feature_id
-           INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
-           INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
-           INNER JOIN {featureloc} FL on FL.feature_id = F.feature_id
-         WHERE
-           FR.object_id = :feature_id and
-           CVT.name = 'CDS' and
-           RCVT.name = 'part_of'
-         ORDER BY FR.rank ASC
-       ";
-       $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
-       $coding_seq = '';
-       while ($CDS = $results->fetchObject()) {
-         if ($CDS->residues) {
-           $coding_seq .= $CDS->residues;
-         }
-       }
-       if ($coding_seq) {
-         $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-           '@type' => 'SO:0000316',
-           'type' => 'coding_sequence',
-           'label' => 'Coding sequence (CDS)',
-           'defline' => chado_get_fasta_defline($feature, 'CDS', NULL, '', strlen($coding_seq)),
-           'residues' => $coding_seq,
-         );
-       }
-   
-       foreach($featureloc_sequences as $src => $attrs){
-         // the $attrs array has the following keys
-         //   * id:  a unique identifier combining the feature id with the cvterm id
-         //   * type: the type of sequence (e.g. mRNA, etc)
-         //   * location:  the alignment location
-         //   * defline: the definition line
-         //   * formatted_seq: the formatted sequences
-         //   * featureloc:  the feature object aligned to
-         $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-           'residues' => $attrs['residues'],
-           '@type' => 'SO:0000110',
-           'type' => 'sequence_feature',
-           'defline' => chado_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', strlen($attrs['residues'])),
-           'label' => 'Sequence from alignment at ' . $attrs['location'],
-         );
-   
-   
-         // check to see if this alignment has any CDS. If so, generate a CDS sequence
-         $cds_sequence = chado_get_feature_sequences(
-             array(
-               'feature_id' => $feature->feature_id,
-               'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id,
-               'name' => $feature->name,
-               'featureloc_id' => $attrs['featureloc']->featureloc_id,
-             ),
-             array(
-               'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
-               'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
-               'sub_feature_types' => array('CDS'), // we're looking for CDS features
-               'is_html' => 0
-             )
-             );
-   
-         if (count($cds_sequence) > 0) {
-           // the chado_get_feature_sequences() function can return multiple sequences
-           // if a feature is aligned to multiple places. In the case of CDSs we expect
-           // that one mRNA is only aligned to a single location on the assembly so we
-           // can access the CDS sequence with index 0.
-           if ($cds_sequence[0]['residues']) {
-             $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-               'residues' => $cds_sequence[0]['residues'],
-               '@type' => 'SO:0000316',
-               'type' => 'coding_sequence',
-               'defline' => chado_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', $cds_sequence[0]['length']),
-               'label' => 'Coding sequence (CDS) from alignment at  ' . $attrs['location'],
-             );
-           }
-         }
-       } */
   }
 }
 

+ 40 - 36
tripal_chado/includes/TripalFields/data__sequence_coordinates/data__sequence_coordinates.inc

@@ -224,49 +224,53 @@ class data__sequence_coordinates extends ChadoField {
     $strand_term = chado_get_semweb_term('featureloc', 'strand');
     $phase_term = chado_get_semweb_term('featureloc', 'phase');
 
-    $options = [
-      'return_array' => TRUE,
-      'order_by' => ['rank' => 'ASC'],
-    ];
-    $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
-
     // Set some defauls for the empty record
     $entity->{$field_name}['und'][0] = [
       'value' => '',
     ];
 
-    // Get the featureloc records that this feature is aligned to.
-    $aligned = $feature->featureloc->feature_id;
-    if ($aligned) {
-      foreach ($aligned as $index => $featureloc) {
-        $srcfeature = $featureloc->srcfeature_id->name;
+    // Get the featureloc records that this feature is aligned to. We use
+    // this SQL rather than the chado_expand_var function because we don't
+    // want the residues included from the srcfeature_id which may be huge
+    // and overrun memory.
+    $featurelocs_sql = "
+      SELECT SRCF.name, FL.srcfeature_id, FL.strand, FL.fmin, FL.fmax, FL,phase
+      FROM {featureloc} FL
+        INNER JOIN {feature} SRCF on SRCF.feature_id = FL.srcfeature_id
+      WHERE FL.feature_id = :feature_id
+      ORDER BY rank ASC
+    ";
+    $aligned = chado_query($featurelocs_sql, [':feature_id' => $feature->feature_id]);
+    $index = 0;
+    while ($featureloc = $aligned->fetchObject()) {
+      $srcfeature = $featureloc->name;
+      $strand = '';
+      if ($featureloc->strand == 1) {
+        $strand = '+';
+      }
+      elseif ($featureloc->strand == -1) {
+        $strand = '-';
+      }
+      else {
         $strand = '';
-        if ($featureloc->strand == 1) {
-          $strand = '+';
-        }
-        elseif ($featureloc->strand == -1) {
-          $strand = '-';
-        }
-        else {
-          $strand = '';
-        }
-        $fmin = $featureloc->fmin + 1;
-        $fmax = $featureloc->fmax;
-        $entity->{$field_name}['und'][$index] = [
-          'value' => [
-            $description => $srcfeature . ':' . $fmin . '-' . $fmax . $strand,
-            $reference_term => $srcfeature,
-            $fmin_term => $fmin,
-            $fmax_term => $fmax,
-            $strand_term => $strand,
-            $phase_term => $featureloc->phase,
-          ],
-        ];
-        $sentity_id = chado_get_record_entity_by_table('feature_id', $featureloc->srcfeature_id->feature_id);
-        if ($sentity_id) {
-          $entity->{$field_name}['und'][0]['value']['entity'] = 'TripalEntity:' . $sentity_id;
-        }
       }
+      $fmin = $featureloc->fmin + 1;
+      $fmax = $featureloc->fmax;
+      $entity->{$field_name}['und'][$index] = [
+        'value' => [
+          $description => $srcfeature . ':' . $fmin . '-' . $fmax . $strand,
+          $reference_term => $srcfeature,
+          $fmin_term => $fmin,
+          $fmax_term => $fmax,
+          $strand_term => $strand,
+          $phase_term => $featureloc->phase,
+        ],
+      ];
+      $sentity_id = chado_get_record_entity_by_table('feature_id', $featureloc->srcfeature_id);
+      if ($sentity_id) {
+        $entity->{$field_name}['und'][0]['value']['entity'] = 'TripalEntity:' . $sentity_id;
+      }
+      $index++;
     }
   }
 }

+ 421 - 0
tripal_chado/includes/TripalFields/data__sequence_record/data__sequence_record.inc

@@ -0,0 +1,421 @@
+<?php
+
+class data__sequence_record extends ChadoField {
+
+
+  // --------------------------------------------------------------------------
+  //                     EDITABLE STATIC CONSTANTS
+  //
+  // The following constants SHOULD be set for each descendent class.  They are
+  // used by the static functions to provide information to Drupal about
+  // the field and it's default widget and formatter.
+  // --------------------------------------------------------------------------
+
+  // The default label for this field.
+  public static $default_label = 'Sequences';
+
+  // The default description for this field.
+  public static $description = 'A field for displaying all sequences associated with a feature along with their metadata.';
+
+  // Provide a list of instance specific settings. These can be accessed within
+  // the instanceSettingsForm.  When the instanceSettingsForm is submitted
+  // then Drupal will automatically change these settings for the instance.
+  // It is recommended to put settings at the instance level whenever possible.
+  // If you override this variable in a child class be sure to replicate the
+  // term_name, term_vocab, term_accession and term_fixed keys as these are
+  // required for all TripalFields.
+  public static $default_instance_settings = [
+    // The short name for the vocabulary (e.g. schema, SO, GO, PATO, etc.).
+    'term_vocabulary' => 'data',
+    // The name of the term.
+    'term_name' => 'sequence_record',
+    // The unique ID (i.e. accession) of the term.
+    'term_accession' => '0849',
+    // Set to TRUE if the site admin is allowed to change the term
+    // type. This will create form elements when editing the field instance
+    // to allow the site admin to change the term settings above.
+    'term_fixed' => FALSE,
+  ];
+
+  // Indicates the download formats for this field.  The list must be the
+  // name of a child class of the TripalFieldDownloader.
+  public static $download_formatters = [
+    'TripalTabDownloader',
+    'TripalCSVDownloader',
+    'TripalNucFASTADownloader',
+  ];
+
+  // The default widget for this field.
+  public static $default_widget = 'data__sequence_record_widget';
+
+  // The default formatter for this field.
+  public static $default_formatter = 'data__sequence_record_formatter';
+
+
+  /**
+   * @see TripalField::elementInfo()
+   */
+  public function elementInfo() {
+    $field_term = $this->getFieldTermID();
+    return [
+      $field_term => [
+        'operations' => [],
+        'sortable' => FALSE,
+        'searchable' => FALSE,
+        'type' => 'xs:complex',
+        'readonly' => TRUE,
+      ],
+    ];
+  }
+
+  /**
+   * @see TripalField::load()
+   */
+  public function load($entity) {
+    $field_name = $this->field['field_name'];
+    $feature = $entity->chado_record;
+
+    // Add the primary sequence from the Chada feature table, residues column.
+    $feature = chado_expand_var($feature, 'field', 'feature.residues');
+
+    // Always add the primary sequence.
+    $this->addPrimary($entity, $feature, $field_name);
+
+    // If this is an mRNA feature then add the gene parent, full length
+    // mRNA, CDS and protein.
+    if ($feature->type_id->name == 'mRNA') {
+      $this->addGeneParent($entity, $feature, $field_name);
+      $featurelocs = $this->addFLmRNA($entity, $feature, $field_name);
+      if (count($featurelocs) > 0) {
+        $this->addCDS($entity, $feature, $field_name, $featurelocs);
+        $this->addProtein($entity, $feature, $field_name);
+      }
+    }
+    // For all others get the sequence from the reference.
+    else {
+      $this->addGenericReference($entity, $feature, $field_name);
+    }
+  }
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addGenericReference(&$entity, $feature, $field_name) {
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_coords_term = 'data:2012';
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    $options = [
+      'derive_from_parent' => 1,
+    ];
+    $seqs = chado_get_feature_sequences(['feature_id' => $feature->feature_id], $options);
+    $featurelocs = [];
+    foreach ($seqs as $seq) {
+      $featureloc = $this->getFeatureLoc($seq['featureloc_id']);
+      $coords = $this->getSequenceCoords($featureloc);
+
+      $entity->{$field_name}['und'][]['value'] = [
+        $sequence_term => $seq['residues'],
+        $label_term =>  ucfirst(preg_replace('/_/', ' ', $feature->type_id->name)) . ' Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
+        $description_term => 'This sequence was extracted from the reference sequence location at ' . $coords['schema:description'] . '.',
+        $seq_coords_term => $coords,
+        $seq_length_term => strlen($seq['residues']),
+        $seq_md5sum_term => md5($seq['residues']),
+        $type_term => $feature->type_id->name
+      ];
+
+      $featurelocs[] = $featureloc;
+    }
+    return $featurelocs;
+  }
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addPrimary(&$entity, $feature, $field_name) {
+
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    if ($feature->residues) {
+      $entity->{$field_name}['und'][]['value'] = [
+        $label_term => 'Primary ' . preg_replace('/_/', ' ', $feature->type_id->name) . 'Sequence (' . number_format($feature->seqlen) . 'bp)',
+        $description_term => 'This is the primary representative sequence for this feature.',
+        $sequence_term => $feature->residues,
+        $seq_length_term => $feature->seqlen,
+        $seq_md5sum_term => $feature->md5checksum,
+        $type_term => $feature->type_id->name
+      ];
+    }
+  }
+
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addFLmRNA(&$entity, $feature, $field_name) {
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_coords_term = 'data:2012';
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    // Sometimes an mRNA may have only exons, only CDS or both exons and
+    // CDS.  We need to know which.
+    $sql = "
+      SELECT DISTINCT CVT.name
+      FROM {feature_relationship} FR
+        INNER JOIN {feature} SF on FR.subject_id = SF.feature_id
+        INNER JOIN {feature} OF on FR.object_id = OF.feature_id
+        INNER JOIN {cvterm} CVT on SF.type_id = CVT.cvterm_id
+      WHERE FR.object_id = :feature_id
+    ";
+    $subtypes = chado_query($sql, [':feature_id' => $feature->feature_id])->fetchCol('name');
+
+    $exon = 'exon';
+    if (!in_array('exon', $subtypes) and in_array('CDS', $subtypes)) {
+      $exon = 'CDS';
+    }
+
+    $options = [
+      'derive_from_parent' => 1,
+      'aggregate' => 1,
+      'is_html' => 0,
+      'sub_feature_types' => ['three_prime_UTR', $exon, 'five_prime_UTR'],
+    ];
+    $seqs = chado_get_feature_sequences(['feature_id' => $feature->feature_id], $options);
+    $featurelocs = [];
+    foreach ($seqs as $seq) {
+
+      $featureloc = $this->getFeatureLoc($seq['featureloc_id']);
+      $coords = $this->getSequenceCoords($featureloc);
+      $types = preg_replace('/_/', ' ', $seq['types']);
+
+      $entity->{$field_name}['und'][]['value'] = [
+        $sequence_term => $seq['residues'],
+        $label_term => 'Full Length mRNA Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
+        $description_term => 'This full length mRNA sequence was extracted from the reference sequence location at ' . $coords['schema:description'] . ' and contains: ' . implode(', ', $types) ,
+        $seq_coords_term => $coords,
+        $seq_length_term => strlen($seq['residues']),
+        $seq_md5sum_term => md5($seq['residues']),
+        $type_term => 'mRNA'
+      ];
+
+      $featurelocs[] = $featureloc;
+    }
+    return $featurelocs;
+  }
+
+  /**
+   *
+   * @param unknown $featureloc_id
+   * @return unknown
+   */
+  private function getFeatureLoc($featureloc_id) {
+    $featurelocs_sql = "
+        SELECT SRCF.name, FL.srcfeature_id, FL.strand, FL.fmin, FL.fmax, FL,phase, FL.featureloc_id
+        FROM {featureloc} FL
+          INNER JOIN {feature} SRCF on SRCF.feature_id = FL.srcfeature_id
+        WHERE FL.featureloc_id = :featureloc_id
+     ";
+    return chado_query($featurelocs_sql, [':featureloc_id' => $featureloc_id])->fetchObject();
+  }
+
+  /**
+   *
+   * @param unknown $featureloc
+   * @return string[]|number[]|NULL[]|unknown[]
+   */
+  private function getSequenceCoords($featureloc) {
+    $description_term = 'schema:description';
+    $reference_term = 'data:3002';
+    $fmin_term = chado_get_semweb_term('featureloc', 'fmin');
+    $fmax_term = chado_get_semweb_term('featureloc', 'fmax');
+    $strand_term = chado_get_semweb_term('featureloc', 'strand');
+
+    $srcfeature = $featureloc->name;
+    $strand = '';
+    if ($featureloc->strand == 1) {
+      $strand = '+';
+    }
+    elseif ($featureloc->strand == -1) {
+      $strand = '-';
+    }
+    else {
+      $strand = '';
+    }
+    $fmin = $featureloc->fmin + 1;
+    $fmax = $featureloc->fmax;
+    $location = $srcfeature . ':' . $fmin . '-' . $fmax . $strand;
+
+    return [
+      $description_term => $location,
+      $reference_term => $srcfeature,
+      $fmin_term => $fmin,
+      $fmax_term => $fmax,
+      $strand_term => $strand,
+    ];
+  }
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addCDS(&$entity, $feature, $field_name, $featurelocs) {
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    foreach ($featurelocs as $featureloc) {
+      $cds_feature = [
+        'feature_id' => $feature->feature_id,
+        'parent_id' => $featureloc->srcfeature_id,
+        'name' => $feature->name,
+        'featureloc_id' => $featureloc->featureloc_id,
+      ];
+      $options = [
+        'derive_from_parent' => 1,
+        'aggregate' => 1,
+        'sub_feature_types' => ['CDS'],
+        'is_html' => 0,
+      ];
+
+      $cds_sequence = chado_get_feature_sequences($cds_feature, $options);
+      $coords = $this->getSequenceCoords($featureloc);
+
+      if (count($cds_sequence) > 0) {
+        $entity->{$field_name}['und'][]['value'] = [
+          $label_term => 'Coding Sequence (' . number_format($cds_sequence[0]['length']) . 'bp)',
+          $sequence_term => $cds_sequence[0]['residues'],
+          $description_term => 'This CDS was extracted from the reference sequence location at ' . $coords['schema:description'] . '.' ,
+          $seq_length_term => $cds_sequence[0]['length'],
+          $seq_md5sum_term => md5($cds_sequence[0]['residues']),
+          $type_term => 'CDS'
+        ];
+      }
+    }
+  }
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addGeneParent(&$entity, $feature, $field_name) {
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $seq_coords_term = 'data:2012';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    $sql = "
+      SELECT FO.*
+      FROM {feature_relationship} FREL
+        INNER JOIN {feature} FO on FO.feature_id = FREL.object_id
+        INNER JOIN {cvterm} CVT on CVT.cvterm_id = FO.type_id
+        INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FREL.type_id
+      WHERE
+        FREL.subject_id = :feature_id and
+        CVT.name = 'gene' and
+        RCVT.name IN ('part_of')
+    ";
+    $genes = chado_query($sql, [':feature_id' => $feature->feature_id]);
+    while ($gene = $genes->fetchObject()) {
+      if (!empty($gene->residues)) {
+        $entity->{$field_name}['und'][]['value'] = [
+          $label_term => 'Gene Sequence (primary)',
+          $sequence_term => $gene->residues,
+          $label_term => 'The gene sequence.',
+          $seq_length_term => strlen($gene->residues),
+          $seq_md5sum_term => md5($gene->residues),
+          $type_term => 'polypeptide'
+        ];
+      }
+      else {
+        $seqs = chado_get_feature_sequences(['feature_id' => $gene->feature_id], ['derive_from_parent' => 1]);
+        foreach ($seqs as $seq) {
+          $featureloc = $this->getFeatureLoc($seq['featureloc_id']);
+          $coords = $this->getSequenceCoords($featureloc);
+          $entity->{$field_name}['und'][]['value'] = [
+            $label_term => 'Gene Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
+            $sequence_term => $seq['residues'],
+            $description_term => 'This gene sequence was extracted from the reference sequence location at ' . $coords['schema:description'] . '.' ,
+            $seq_coords_term => $coords,
+            $seq_length_term => strlen($seq['residues']),
+            $seq_md5sum_term => md5($seq['residues']),
+            $type_term => 'gene'
+          ];
+        }
+      }
+    }
+  }
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addProtein(&$entity, $feature, $field_name) {
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    $sql = "
+      SELECT F.*
+      FROM {feature_relationship} FR
+        INNER JOIN {feature} F on FR.subject_id = F.feature_id
+        INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
+        INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
+      WHERE
+        FR.object_id = :feature_id and
+        CVT.name = 'polypeptide' and
+        RCVT.name IN ('derives_from', 'part_of')
+      ORDER BY FR.rank ASC
+    ";
+    $proteins = chado_query($sql, [':feature_id' => $feature->feature_id]);
+    while ($protein = $proteins->fetchObject()) {
+      if (!empty($protein->residues)) {
+        $entity->{$field_name}['und'][]['value'] = [
+          $label_term => 'Protein Sequence (' . number_format(strlen($protein->residues)) . 'aa)',
+          $sequence_term => $protein->residues,
+          $description_term => 'The protein sequence.',
+          $seq_length_term => strlen($protein->residues),
+          $seq_md5sum_term => md5($protein->residues),
+          $type_term => 'polypeptide'
+        ];
+      }
+    }
+  }
+}
+

+ 63 - 0
tripal_chado/includes/TripalFields/data__sequence_record/data__sequence_record_formatter.inc

@@ -0,0 +1,63 @@
+<?php
+
+class data__sequence_record_formatter extends ChadoFieldFormatter {
+
+  // The default label for this field.
+  public static $default_label = 'Sequences';
+
+  // The list of field types for which this formatter is appropriate.
+  public static $field_types = ['data__sequence_record'];
+
+  /**
+   * @see TripalFieldFormatter::view()
+   */
+  public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
+
+    $label_term = 'rdfs:label';
+    $description_term = 'schema:description';
+    $type_term = 'rdfs:type';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_coords_term = 'data:2012';
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+    $description_term = 'schema:description';
+    $reference_term = 'data:3002';
+    $fmin_term = chado_get_semweb_term('featureloc', 'fmin');
+    $fmax_term = chado_get_semweb_term('featureloc', 'fmax');
+    $strand_term = chado_get_semweb_term('featureloc', 'strand');
+    $phase_term = chado_get_semweb_term('featureloc', 'phase');
+
+    $content = [];
+
+    foreach ($items as $delta => $item) {
+      if (empty($item['value'])) {
+        continue;
+      }
+
+      $num_bases = 50;
+      $residues = '<pre class="residues-formatter">';
+      $residues .= wordwrap($item['value'][$sequence_term], $num_bases, "<br>", TRUE);
+      $residues .= '</pre>';
+
+      $content[] = [
+        '#type' => 'item',
+        '#title' => $item['value'][$label_term],
+        '#description' => $item['value'][$description_term],
+        '#markup' => $residues,
+      ];
+
+
+    }
+
+
+    if (empty($content)) {
+      $element[0] = [
+        '#type' => 'markup',
+        '#markup' => '',
+      ];
+      return;
+    }
+
+    $element[0] = $content;
+  }
+}

+ 26 - 0
tripal_chado/includes/TripalFields/data__sequence_record/data__sequence_record_widget.inc

@@ -0,0 +1,26 @@
+<?php
+
+class data__sequence_record_widget extends ChadoFieldWidget {
+
+  // The default label for this field.
+  public static $default_label = 'Sequences';
+
+  // The list of field types for which this formatter is appropriate.
+  public static $field_types = ['data__sequence_record'];
+
+  /**
+   *
+   * @see TripalFieldWidget::form()
+   */
+  public function form(&$widget, &$form, &$form_state, $langcode, $items, $delta, $element) {
+
+  }
+
+  /**
+   *
+   * @see TripalFieldWidget::submit()
+   */
+  public function validate($element, $form, &$form_state, $langcode, $delta) {
+
+  }
+}

+ 1 - 1
tripal_chado/includes/TripalFields/sbo__relationship/sbo__relationship.inc

@@ -524,7 +524,7 @@ class sbo__relationship extends ChadoField {
       // relationship type and the object and subject
       'include_fk' => [
         'type_id' => 1,
-        $object_id_key => [
+          $object_id_key => [
           'type_id' => 1,
           'organism_id' => 1,
         ],

+ 25 - 15
tripal_chado/includes/TripalFields/so__cds/so__cds.inc

@@ -72,24 +72,24 @@ class so__cds extends ChadoField {
       'value' => '',
     ];
 
-    $options = [
-      'return_array' => TRUE,
-      'order_by' => ['rank' => 'ASC'],
-    ];
-    $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
-    $featurelocs = $feature->featureloc->feature_id;
-
-    // Verify that we have featurelocs before entering the loop
-    if (!is_array($featurelocs)) {
-      return;
-    }
-
-    foreach ($featurelocs as $featureloc) {
+    // Get the featureloc records that this feature is aligned to. We use
+    // this SQL rather than the chado_expand_var function because we don't
+    // want the residues included from the srcfeature_id which may be huge
+    // and overrun memory.
+    $featurelocs_sql = "
+      SELECT FL.featureloc_id, FL.srcfeature_id
+      FROM {featureloc} FL
+      WHERE FL.feature_id = :feature_id
+      ORDER BY rank ASC
+    ";
+    $aligned = chado_query($featurelocs_sql, [':feature_id' => $feature->feature_id]);
+    $index = 0;
+    while ($featureloc = $aligned->fetchObject()) {
       // Generate a CDS sequence if one exsits for this feature alignment.
       $cds_sequence = chado_get_feature_sequences(
         [
           'feature_id' => $feature->feature_id,
-          'parent_id' => $featureloc->srcfeature_id->feature_id,
+          'parent_id' => $featureloc->srcfeature_id,
           'name' => $feature->name,
           'featureloc_id' => $featureloc->featureloc_id,
         ],
@@ -111,7 +111,17 @@ class so__cds extends ChadoField {
         // that one mRNA is only aligned to a single location on the assembly so we
         // can access the CDS sequence with index 0.
         if ($cds_sequence[0]['residues']) {
-          $entity->{$field_name}['und'][$num_seqs++]['value'] = $cds_sequence[0]['residues'];
+          $entity->{$field_name}['und'][$num_seqs++] = [
+            'value' => $cds_sequence[0]['residues'],
+            // This field was incorrelctly listed as a field in the featureprop
+            // table, but really it is a derived field. So, we have to do this
+            // hacky fix to get around the problem.
+            'chado-featureprop__featureprop_id' => NULL,
+            'chado-featureprop__feature_id' => NULL,
+            'chado-featureprop__value' => NULL,
+            'chado-featureprop__type_id' => NULL,
+            'chado-featureprop__rank' => NULL
+          ];
         }
       }
     }

+ 3 - 4
tripal_chado/includes/TripalFields/so__cds/so__cds_widget.inc

@@ -15,16 +15,15 @@ class so__cds_widget extends ChadoFieldWidget {
   public function form(&$widget, &$form, &$form_state, $langcode, $items, $delta, $element) {
     parent::form($widget, $form, $form_state, $langcode, $items, $delta, $element);
 
-    // TODO: add a widget...
   }
 
 
+
+
   /**
    *
    * @see TripalFieldWidget::submit()
    */
-  public function submit($form, &$form_state, $entity_type, $entity, $langcode, $delta) {
-    $field_name = $this->field['field_name'];
-
+  public function validate($element, $form, &$form_state, $langcode, $delta) {
   }
 }

+ 44 - 0
tripal_chado/includes/tripal_chado.fields.inc

@@ -380,6 +380,18 @@ function tripal_chado_bundle_fields_info_custom(&$info, $details, $entity_type,
         'type' => 'field_chado_storage',
       ],
     ];
+
+    $field_name = 'data__sequence_record';
+    $field_type = 'data__sequence_record';
+    $info[$field_name] = [
+      'field_name' => $field_name,
+      'type' => $field_type,
+      'cardinality' => FIELD_CARDINALITY_UNLIMITED,
+      'locked' => FALSE,
+      'storage' => [
+        'type' => 'field_chado_storage',
+      ],
+    ];
   }
 
   // FEATURE SEQLEN
@@ -1703,6 +1715,38 @@ function tripal_chado_bundle_instances_info_custom(&$info, $entity_type, $bundle
         ],
       ],
     ];
+
+    $field_name = 'data__sequence_record';
+    $info[$field_name] = [
+      'field_name' => $field_name,
+      'entity_type' => $entity_type,
+      'bundle' => $bundle->name,
+      'label' => 'Sequences',
+      'description' => 'A molecular sequence and associated metadata.',
+      'required' => FALSE,
+      'settings' => [
+        'auto_attach' => FALSE,
+        'chado_table' => $table_name,
+        'chado_column' => 'residues',
+        'base_table' => $table_name,
+        'term_accession' => '0849',
+        'term_vocabulary' => 'data',
+        'term_name' => 'Sequence record',
+      ],
+      'widget' => [
+        'type' => 'data__sequence_record_widget',
+        'settings' => [
+          'display_label' => 1,
+        ],
+      ],
+      'display' => [
+        'default' => [
+          'label' => 'above',
+          'type' => 'data__sequence_record_formatter',
+          'settings' => [],
+        ],
+      ],
+    ];
   }
 
   // FEATURE SEQLEN

+ 8 - 1
tripal_chado/includes/tripal_chado.semweb.inc

@@ -566,10 +566,17 @@ function tripal_chado_populate_vocab_EDAM() {
     'id' => 'data:2044',
     'name' => 'Sequence',
     'cv_name' => 'EDAM',
-    'definition' => 'One or more molecular sequences, possibly with associated annotation..',
+    'definition' => 'One or more molecular sequences, possibly with associated annotation.',
   ]);
   chado_associate_semweb_term('feature', 'residues', $term);
 
+  $term = chado_insert_cvterm([
+    'id' => 'data:0849',
+    'name' => 'Sequence record',
+    'cv_name' => 'EDAM',
+    'definition' => 'A molecular sequence and associated metadata.',
+  ]);
+
   $term = chado_insert_cvterm([
     'id' => 'data:0842',
     'name' => 'Identifier',

+ 1 - 1
tripal_chado/theme/css/tripal_chado.css

@@ -36,7 +36,7 @@
 
 .residues-formatter {
   color: black;
-  height: 100px;
+  height: 200px;
   max-width: 500px;
   overflow: scroll;
   white-space: normal;

+ 18 - 0
tripal_chado/tripal_chado.install

@@ -2074,4 +2074,22 @@ function tripal_chado_update_7338() {
     throw new DrupalUpdateException('Could not perform update: '. $error);
   }
 
+}
+
+/**
+ * Adds the "Sequence record" cvterm for the data__sequence_record field.
+ */
+function tripal_chado_update_7339() {
+  try {
+    $term = chado_insert_cvterm([
+      'id' => 'data:0849',
+      'name' => 'Sequence record',
+      'cv_name' => 'EDAM',
+      'definition' => 'A molecular sequence and associated metadata.',
+    ]);
+    drupal_set_message("\n\nNOTE: This update includes a new \"Sequences\" field. It compiles both primary sequences and sequences extracted from the reference into a single list. For mRNA it provides gene, full length mRNA, CDS and protein sequences (if available). Please consider using this new field and disabling other sequence fields.\n\n");
+  } catch (\PDOException $e) {
+    $error = $e->getMessage();
+    throw new DrupalUpdateException('Could not perform update: '. $error);
+  }
 }

+ 17 - 8
tripal_ds/includes/tripal_ds.ds.inc

@@ -72,28 +72,37 @@ function _ds_layout_settings_info($bundle_name, $instances) {
         $instance_base_table = array_key_exists('base_table', $instance['settings']) ? $instance['settings']['base_table'] : '';
         $instance_base_chado = array_key_exists('chado_table', $instance['settings']) ? $instance['settings']['chado_table'] : '';
         $prop_table = strpos($instance_base_chado, 'prop');
+        $data_sequence_record = strpos($instance_name, 'data__sequence_record');
         $data_sequence = strpos($instance_name, 'data__sequence');
         if ($instance_base_chado && $instance_base_table) {
 
           if ($instance_base_chado == $instance_base_table) {
-            if ($prop_table !== FALSE) {
-              array_push($prop_fields, $instance_name);
-            }
-            elseif ($data_sequence !== FALSE) {
+            if ($data_sequence_record !== FALSE) {
               array_push($data_sequence_fields, $instance_name);
             }
+            elseif ($data_sequence !== FALSE or $instance_name == 'so__cds' or $instance_name == 'data__protein_sequence') {
+              // Do nothing as these chado column sequence fields
+              // should be hidden by default.
+            }
+            elseif ($prop_table !== FALSE) {
+              array_push($prop_fields, $instance_name);
+            }
             else {
               array_push($summary_fields, $instance_name);
             }
 
           }
           elseif ($instance_base_chado != $instance_base_table) {
-            if ($prop_table !== FALSE) {
-              array_push($prop_fields, $instance_name);
-            }
-            elseif ($data_sequence !== FALSE) {
+            if ($data_sequence_record !== FALSE) {
               array_push($data_sequence_fields, $instance_name);
             }
+            elseif ($data_sequence !== FALSE or $instance_name == 'so__cds' or $instance_name == 'data__protein_sequence') {
+              // Do nothing as these chado column sequence fields
+              // should be hidden by default.
+            }
+            elseif ($prop_table !== FALSE) {
+              array_push($prop_fields, $instance_name);
+            }
             else {
               array_push($all_other_fields, $instance);