Browse Source

Fixed both issues described in #1135. Also made a few additional fixes for issues with PHP 7.4, and adjusted the default layouts for sequences

Stephen Ficklin 4 years ago
parent
commit
2253f7ee3c

+ 7 - 5
tripal/includes/tripal.fields.inc

@@ -911,11 +911,13 @@ function tripal_form_field_ui_display_overview_form_alter(&$form, &$form_state,
   $fields_names = element_children($form['fields']);
   $fields_names = element_children($form['fields']);
   foreach ($fields_names as $field_name) {
   foreach ($fields_names as $field_name) {
     $field_info = field_info_field($field_name);
     $field_info = field_info_field($field_name);
-    if ($field_info['type'] == 'kvproperty_adder') {
-      unset($form['fields'][$field_name]);
-    }
-    if ($field_info['type'] == 'cvterm_class_adder') {
-      unset($form['fields'][$field_name]);
+    if ($field_info) {
+      if ($field_info['type'] == 'kvproperty_adder') {
+        unset($form['fields'][$field_name]);
+      }
+      if ($field_info['type'] == 'cvterm_class_adder') {
+        unset($form['fields'][$field_name]);
+      }
     }
     }
   }
   }
 }
 }

+ 1 - 7
tripal_chado/includes/TripalFields/data__protein_sequence/data__protein_sequence.inc

@@ -93,18 +93,12 @@ class data__protein_sequence extends ChadoField {
       WHERE
       WHERE
         FR.object_id = :feature_id and
         FR.object_id = :feature_id and
         CVT.name = 'polypeptide' and
         CVT.name = 'polypeptide' and
-        RCVT.name  IN ('derives_from', 'part_of')
+        RCVT.name IN ('derives_from', 'part_of')
       ORDER BY FR.rank ASC
       ORDER BY FR.rank ASC
     ";
     ";
     $proteins = chado_query($sql, [':feature_id' => $feature->feature_id]);
     $proteins = chado_query($sql, [':feature_id' => $feature->feature_id]);
     while ($protein = $proteins->fetchObject()) {
     while ($protein = $proteins->fetchObject()) {
       $entity->{$field_name}['und'][$num_seqs]['value'] = $protein->residues;
       $entity->{$field_name}['und'][$num_seqs]['value'] = $protein->residues;
-      // Because we'll be saving a feature we need to maintain all of it's
-      // columns in the feature table. The following will add them all.
-      $columns = get_object_vars($protein);
-      foreach ($columns as $colname => $value) {
-        $entity->{$field_name}['und'][$num_seqs]['chado-feature__' . $colname] = $value;
-      }
       $num_seqs++;
       $num_seqs++;
     }
     }
   }
   }

+ 2 - 1
tripal_chado/includes/TripalFields/data__protein_sequence/data__protein_sequence_formatter.inc

@@ -12,7 +12,8 @@ class data__protein_sequence_formatter extends ChadoFieldFormatter {
    * @see TripalFieldFormatter::view()
    * @see TripalFieldFormatter::view()
    */
    */
   public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
   public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
-    $content = 'There is no protein sequence.';
+
+    $content = 'There is no protein sequence available.';
     if (count($items) > 0 and $items[0]['value']) {
     if (count($items) > 0 and $items[0]['value']) {
       $num_bases = 50;
       $num_bases = 50;
       $content = '<pre class="protein-residues-formatter">';
       $content = '<pre class="protein-residues-formatter">';

+ 1 - 98
tripal_chado/includes/TripalFields/data__sequence/data__sequence.inc

@@ -15,7 +15,7 @@ class data__sequence extends ChadoField {
   public static $default_label = 'Sequence';
   public static $default_label = 'Sequence';
 
 
   // The default description for this field.
   // The default description for this field.
-  public static $description = 'A field for managing nucleotide and protein residues.';
+  public static $description = 'A field for managing the primary sequence for a feature.';
 
 
   // Provide a list of instance specific settings. These can be accessed within
   // Provide a list of instance specific settings. These can be accessed within
   // the instanceSettingsForm.  When the instanceSettingsForm is submitted
   // the instanceSettingsForm.  When the instanceSettingsForm is submitted
@@ -77,103 +77,6 @@ class data__sequence extends ChadoField {
 
 
     $feature = chado_expand_var($feature, 'field', 'feature.residues');
     $feature = chado_expand_var($feature, 'field', 'feature.residues');
     $entity->{$field_name}['und'][0]['value'] = $feature->residues;
     $entity->{$field_name}['und'][0]['value'] = $feature->residues;
-
-    /* // Add in sequences from alignments.
-       $options = array(
-         'return_array' => 1,
-         'include_fk' => array(
-           'srcfeature_id' => array(
-             'type_id' => 1
-           ),
-           'feature_id' => array(
-             'type_id' => 1
-           ),
-         ),
-       );
-       $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
-       $featureloc_sequences = $this->get_featureloc_sequences($feature->feature_id, $feature->featureloc->feature_id);
-   
-       // Add in the coding sequences. It's faster to provide the SQL rather than
-       // to use chado_generate_var based on the type.
-       $sql = "
-         SELECT F.*
-         FROM {feature_relationship} FR
-           INNER JOIN {feature} F on FR.subject_id = F.feature_id
-           INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
-           INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
-           INNER JOIN {featureloc} FL on FL.feature_id = F.feature_id
-         WHERE
-           FR.object_id = :feature_id and
-           CVT.name = 'CDS' and
-           RCVT.name = 'part_of'
-         ORDER BY FR.rank ASC
-       ";
-       $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
-       $coding_seq = '';
-       while ($CDS = $results->fetchObject()) {
-         if ($CDS->residues) {
-           $coding_seq .= $CDS->residues;
-         }
-       }
-       if ($coding_seq) {
-         $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-           '@type' => 'SO:0000316',
-           'type' => 'coding_sequence',
-           'label' => 'Coding sequence (CDS)',
-           'defline' => chado_get_fasta_defline($feature, 'CDS', NULL, '', strlen($coding_seq)),
-           'residues' => $coding_seq,
-         );
-       }
-   
-       foreach($featureloc_sequences as $src => $attrs){
-         // the $attrs array has the following keys
-         //   * id:  a unique identifier combining the feature id with the cvterm id
-         //   * type: the type of sequence (e.g. mRNA, etc)
-         //   * location:  the alignment location
-         //   * defline: the definition line
-         //   * formatted_seq: the formatted sequences
-         //   * featureloc:  the feature object aligned to
-         $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-           'residues' => $attrs['residues'],
-           '@type' => 'SO:0000110',
-           'type' => 'sequence_feature',
-           'defline' => chado_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', strlen($attrs['residues'])),
-           'label' => 'Sequence from alignment at ' . $attrs['location'],
-         );
-   
-   
-         // check to see if this alignment has any CDS. If so, generate a CDS sequence
-         $cds_sequence = chado_get_feature_sequences(
-             array(
-               'feature_id' => $feature->feature_id,
-               'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id,
-               'name' => $feature->name,
-               'featureloc_id' => $attrs['featureloc']->featureloc_id,
-             ),
-             array(
-               'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
-               'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
-               'sub_feature_types' => array('CDS'), // we're looking for CDS features
-               'is_html' => 0
-             )
-             );
-   
-         if (count($cds_sequence) > 0) {
-           // the chado_get_feature_sequences() function can return multiple sequences
-           // if a feature is aligned to multiple places. In the case of CDSs we expect
-           // that one mRNA is only aligned to a single location on the assembly so we
-           // can access the CDS sequence with index 0.
-           if ($cds_sequence[0]['residues']) {
-             $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
-               'residues' => $cds_sequence[0]['residues'],
-               '@type' => 'SO:0000316',
-               'type' => 'coding_sequence',
-               'defline' => chado_get_fasta_defline($feature, '', $attrs['featureloc'], 'CDS', $cds_sequence[0]['length']),
-               'label' => 'Coding sequence (CDS) from alignment at  ' . $attrs['location'],
-             );
-           }
-         }
-       } */
   }
   }
 }
 }
 
 

+ 40 - 36
tripal_chado/includes/TripalFields/data__sequence_coordinates/data__sequence_coordinates.inc

@@ -224,49 +224,53 @@ class data__sequence_coordinates extends ChadoField {
     $strand_term = chado_get_semweb_term('featureloc', 'strand');
     $strand_term = chado_get_semweb_term('featureloc', 'strand');
     $phase_term = chado_get_semweb_term('featureloc', 'phase');
     $phase_term = chado_get_semweb_term('featureloc', 'phase');
 
 
-    $options = [
-      'return_array' => TRUE,
-      'order_by' => ['rank' => 'ASC'],
-    ];
-    $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
-
     // Set some defauls for the empty record
     // Set some defauls for the empty record
     $entity->{$field_name}['und'][0] = [
     $entity->{$field_name}['und'][0] = [
       'value' => '',
       'value' => '',
     ];
     ];
 
 
-    // Get the featureloc records that this feature is aligned to.
-    $aligned = $feature->featureloc->feature_id;
-    if ($aligned) {
-      foreach ($aligned as $index => $featureloc) {
-        $srcfeature = $featureloc->srcfeature_id->name;
+    // Get the featureloc records that this feature is aligned to. We use
+    // this SQL rather than the chado_expand_var function because we don't
+    // want the residues included from the srcfeature_id which may be huge
+    // and overrun memory.
+    $featurelocs_sql = "
+      SELECT SRCF.name, FL.srcfeature_id, FL.strand, FL.fmin, FL.fmax, FL,phase
+      FROM {featureloc} FL
+        INNER JOIN {feature} SRCF on SRCF.feature_id = FL.srcfeature_id
+      WHERE FL.feature_id = :feature_id
+      ORDER BY rank ASC
+    ";
+    $aligned = chado_query($featurelocs_sql, [':feature_id' => $feature->feature_id]);
+    $index = 0;
+    while ($featureloc = $aligned->fetchObject()) {
+      $srcfeature = $featureloc->name;
+      $strand = '';
+      if ($featureloc->strand == 1) {
+        $strand = '+';
+      }
+      elseif ($featureloc->strand == -1) {
+        $strand = '-';
+      }
+      else {
         $strand = '';
         $strand = '';
-        if ($featureloc->strand == 1) {
-          $strand = '+';
-        }
-        elseif ($featureloc->strand == -1) {
-          $strand = '-';
-        }
-        else {
-          $strand = '';
-        }
-        $fmin = $featureloc->fmin + 1;
-        $fmax = $featureloc->fmax;
-        $entity->{$field_name}['und'][$index] = [
-          'value' => [
-            $description => $srcfeature . ':' . $fmin . '-' . $fmax . $strand,
-            $reference_term => $srcfeature,
-            $fmin_term => $fmin,
-            $fmax_term => $fmax,
-            $strand_term => $strand,
-            $phase_term => $featureloc->phase,
-          ],
-        ];
-        $sentity_id = chado_get_record_entity_by_table('feature_id', $featureloc->srcfeature_id->feature_id);
-        if ($sentity_id) {
-          $entity->{$field_name}['und'][0]['value']['entity'] = 'TripalEntity:' . $sentity_id;
-        }
       }
       }
+      $fmin = $featureloc->fmin + 1;
+      $fmax = $featureloc->fmax;
+      $entity->{$field_name}['und'][$index] = [
+        'value' => [
+          $description => $srcfeature . ':' . $fmin . '-' . $fmax . $strand,
+          $reference_term => $srcfeature,
+          $fmin_term => $fmin,
+          $fmax_term => $fmax,
+          $strand_term => $strand,
+          $phase_term => $featureloc->phase,
+        ],
+      ];
+      $sentity_id = chado_get_record_entity_by_table('feature_id', $featureloc->srcfeature_id);
+      if ($sentity_id) {
+        $entity->{$field_name}['und'][0]['value']['entity'] = 'TripalEntity:' . $sentity_id;
+      }
+      $index++;
     }
     }
   }
   }
 }
 }

+ 421 - 0
tripal_chado/includes/TripalFields/data__sequence_record/data__sequence_record.inc

@@ -0,0 +1,421 @@
+<?php
+
+class data__sequence_record extends ChadoField {
+
+
+  // --------------------------------------------------------------------------
+  //                     EDITABLE STATIC CONSTANTS
+  //
+  // The following constants SHOULD be set for each descendent class.  They are
+  // used by the static functions to provide information to Drupal about
+  // the field and it's default widget and formatter.
+  // --------------------------------------------------------------------------
+
+  // The default label for this field.
+  public static $default_label = 'Sequences';
+
+  // The default description for this field.
+  public static $description = 'A field for displaying all sequences associated with a feature along with their metadata.';
+
+  // Provide a list of instance specific settings. These can be accessed within
+  // the instanceSettingsForm.  When the instanceSettingsForm is submitted
+  // then Drupal will automatically change these settings for the instance.
+  // It is recommended to put settings at the instance level whenever possible.
+  // If you override this variable in a child class be sure to replicate the
+  // term_name, term_vocab, term_accession and term_fixed keys as these are
+  // required for all TripalFields.
+  public static $default_instance_settings = [
+    // The short name for the vocabulary (e.g. schema, SO, GO, PATO, etc.).
+    'term_vocabulary' => 'data',
+    // The name of the term.
+    'term_name' => 'sequence_record',
+    // The unique ID (i.e. accession) of the term.
+    'term_accession' => '0849',
+    // Set to TRUE if the site admin is allowed to change the term
+    // type. This will create form elements when editing the field instance
+    // to allow the site admin to change the term settings above.
+    'term_fixed' => FALSE,
+  ];
+
+  // Indicates the download formats for this field.  The list must be the
+  // name of a child class of the TripalFieldDownloader.
+  public static $download_formatters = [
+    'TripalTabDownloader',
+    'TripalCSVDownloader',
+    'TripalNucFASTADownloader',
+  ];
+
+  // The default widget for this field.
+  public static $default_widget = 'data__sequence_record_widget';
+
+  // The default formatter for this field.
+  public static $default_formatter = 'data__sequence_record_formatter';
+
+
+  /**
+   * @see TripalField::elementInfo()
+   */
+  public function elementInfo() {
+    $field_term = $this->getFieldTermID();
+    return [
+      $field_term => [
+        'operations' => [],
+        'sortable' => FALSE,
+        'searchable' => FALSE,
+        'type' => 'xs:complex',
+        'readonly' => TRUE,
+      ],
+    ];
+  }
+
+  /**
+   * @see TripalField::load()
+   */
+  public function load($entity) {
+    $field_name = $this->field['field_name'];
+    $feature = $entity->chado_record;
+
+    // Add the primary sequence from the Chada feature table, residues column.
+    $feature = chado_expand_var($feature, 'field', 'feature.residues');
+
+    // Always add the primary sequence.
+    $this->addPrimary($entity, $feature, $field_name);
+
+    // If this is an mRNA feature then add the gene parent, full length
+    // mRNA, CDS and protein.
+    if ($feature->type_id->name == 'mRNA') {
+      $this->addGeneParent($entity, $feature, $field_name);
+      $featurelocs = $this->addFLmRNA($entity, $feature, $field_name);
+      if (count($featurelocs) > 0) {
+        $this->addCDS($entity, $feature, $field_name, $featurelocs);
+        $this->addProtein($entity, $feature, $field_name);
+      }
+    }
+    // For all others get the sequence from the reference.
+    else {
+      $this->addGenericReference($entity, $feature, $field_name);
+    }
+  }
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addGenericReference(&$entity, $feature, $field_name) {
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_coords_term = 'data:2012';
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    $options = [
+      'derive_from_parent' => 1,
+    ];
+    $seqs = chado_get_feature_sequences(['feature_id' => $feature->feature_id], $options);
+    $featurelocs = [];
+    foreach ($seqs as $seq) {
+      $featureloc = $this->getFeatureLoc($seq['featureloc_id']);
+      $coords = $this->getSequenceCoords($featureloc);
+
+      $entity->{$field_name}['und'][]['value'] = [
+        $sequence_term => $seq['residues'],
+        $label_term =>  ucfirst(preg_replace('/_/', ' ', $feature->type_id->name)) . ' Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
+        $description_term => 'This sequence was extracted from the reference sequence location at ' . $coords['schema:description'] . '.',
+        $seq_coords_term => $coords,
+        $seq_length_term => strlen($seq['residues']),
+        $seq_md5sum_term => md5($seq['residues']),
+        $type_term => $feature->type_id->name
+      ];
+
+      $featurelocs[] = $featureloc;
+    }
+    return $featurelocs;
+  }
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addPrimary(&$entity, $feature, $field_name) {
+
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    if ($feature->residues) {
+      $entity->{$field_name}['und'][]['value'] = [
+        $label_term => 'Primary ' . preg_replace('/_/', ' ', $feature->type_id->name) . 'Sequence (' . number_format($feature->seqlen) . 'bp)',
+        $description_term => 'This is the primary representative sequence for this feature.',
+        $sequence_term => $feature->residues,
+        $seq_length_term => $feature->seqlen,
+        $seq_md5sum_term => $feature->md5checksum,
+        $type_term => $feature->type_id->name
+      ];
+    }
+  }
+
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addFLmRNA(&$entity, $feature, $field_name) {
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_coords_term = 'data:2012';
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    // Sometimes an mRNA may have only exons, only CDS or both exons and
+    // CDS.  We need to know which.
+    $sql = "
+      SELECT DISTINCT CVT.name
+      FROM {feature_relationship} FR
+        INNER JOIN {feature} SF on FR.subject_id = SF.feature_id
+        INNER JOIN {feature} OF on FR.object_id = OF.feature_id
+        INNER JOIN {cvterm} CVT on SF.type_id = CVT.cvterm_id
+      WHERE FR.object_id = :feature_id
+    ";
+    $subtypes = chado_query($sql, [':feature_id' => $feature->feature_id])->fetchCol('name');
+
+    $exon = 'exon';
+    if (!in_array('exon', $subtypes) and in_array('CDS', $subtypes)) {
+      $exon = 'CDS';
+    }
+
+    $options = [
+      'derive_from_parent' => 1,
+      'aggregate' => 1,
+      'is_html' => 0,
+      'sub_feature_types' => ['three_prime_UTR', $exon, 'five_prime_UTR'],
+    ];
+    $seqs = chado_get_feature_sequences(['feature_id' => $feature->feature_id], $options);
+    $featurelocs = [];
+    foreach ($seqs as $seq) {
+
+      $featureloc = $this->getFeatureLoc($seq['featureloc_id']);
+      $coords = $this->getSequenceCoords($featureloc);
+      $types = preg_replace('/_/', ' ', $seq['types']);
+
+      $entity->{$field_name}['und'][]['value'] = [
+        $sequence_term => $seq['residues'],
+        $label_term => 'Full Length mRNA Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
+        $description_term => 'This full length mRNA sequence was extracted from the reference sequence location at ' . $coords['schema:description'] . ' and contains: ' . implode(', ', $types) ,
+        $seq_coords_term => $coords,
+        $seq_length_term => strlen($seq['residues']),
+        $seq_md5sum_term => md5($seq['residues']),
+        $type_term => 'mRNA'
+      ];
+
+      $featurelocs[] = $featureloc;
+    }
+    return $featurelocs;
+  }
+
+  /**
+   *
+   * @param unknown $featureloc_id
+   * @return unknown
+   */
+  private function getFeatureLoc($featureloc_id) {
+    $featurelocs_sql = "
+        SELECT SRCF.name, FL.srcfeature_id, FL.strand, FL.fmin, FL.fmax, FL,phase, FL.featureloc_id
+        FROM {featureloc} FL
+          INNER JOIN {feature} SRCF on SRCF.feature_id = FL.srcfeature_id
+        WHERE FL.featureloc_id = :featureloc_id
+     ";
+    return chado_query($featurelocs_sql, [':featureloc_id' => $featureloc_id])->fetchObject();
+  }
+
+  /**
+   *
+   * @param unknown $featureloc
+   * @return string[]|number[]|NULL[]|unknown[]
+   */
+  private function getSequenceCoords($featureloc) {
+    $description_term = 'schema:description';
+    $reference_term = 'data:3002';
+    $fmin_term = chado_get_semweb_term('featureloc', 'fmin');
+    $fmax_term = chado_get_semweb_term('featureloc', 'fmax');
+    $strand_term = chado_get_semweb_term('featureloc', 'strand');
+
+    $srcfeature = $featureloc->name;
+    $strand = '';
+    if ($featureloc->strand == 1) {
+      $strand = '+';
+    }
+    elseif ($featureloc->strand == -1) {
+      $strand = '-';
+    }
+    else {
+      $strand = '';
+    }
+    $fmin = $featureloc->fmin + 1;
+    $fmax = $featureloc->fmax;
+    $location = $srcfeature . ':' . $fmin . '-' . $fmax . $strand;
+
+    return [
+      $description_term => $location,
+      $reference_term => $srcfeature,
+      $fmin_term => $fmin,
+      $fmax_term => $fmax,
+      $strand_term => $strand,
+    ];
+  }
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addCDS(&$entity, $feature, $field_name, $featurelocs) {
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    foreach ($featurelocs as $featureloc) {
+      $cds_feature = [
+        'feature_id' => $feature->feature_id,
+        'parent_id' => $featureloc->srcfeature_id,
+        'name' => $feature->name,
+        'featureloc_id' => $featureloc->featureloc_id,
+      ];
+      $options = [
+        'derive_from_parent' => 1,
+        'aggregate' => 1,
+        'sub_feature_types' => ['CDS'],
+        'is_html' => 0,
+      ];
+
+      $cds_sequence = chado_get_feature_sequences($cds_feature, $options);
+      $coords = $this->getSequenceCoords($featureloc);
+
+      if (count($cds_sequence) > 0) {
+        $entity->{$field_name}['und'][]['value'] = [
+          $label_term => 'Coding Sequence (' . number_format($cds_sequence[0]['length']) . 'bp)',
+          $sequence_term => $cds_sequence[0]['residues'],
+          $description_term => 'This CDS was extracted from the reference sequence location at ' . $coords['schema:description'] . '.' ,
+          $seq_length_term => $cds_sequence[0]['length'],
+          $seq_md5sum_term => md5($cds_sequence[0]['residues']),
+          $type_term => 'CDS'
+        ];
+      }
+    }
+  }
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addGeneParent(&$entity, $feature, $field_name) {
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $seq_coords_term = 'data:2012';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    $sql = "
+      SELECT FO.*
+      FROM {feature_relationship} FREL
+        INNER JOIN {feature} FO on FO.feature_id = FREL.object_id
+        INNER JOIN {cvterm} CVT on CVT.cvterm_id = FO.type_id
+        INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FREL.type_id
+      WHERE
+        FREL.subject_id = :feature_id and
+        CVT.name = 'gene' and
+        RCVT.name IN ('part_of')
+    ";
+    $genes = chado_query($sql, [':feature_id' => $feature->feature_id]);
+    while ($gene = $genes->fetchObject()) {
+      if (!empty($gene->residues)) {
+        $entity->{$field_name}['und'][]['value'] = [
+          $label_term => 'Gene Sequence (primary)',
+          $sequence_term => $gene->residues,
+          $label_term => 'The gene sequence.',
+          $seq_length_term => strlen($gene->residues),
+          $seq_md5sum_term => md5($gene->residues),
+          $type_term => 'polypeptide'
+        ];
+      }
+      else {
+        $seqs = chado_get_feature_sequences(['feature_id' => $gene->feature_id], ['derive_from_parent' => 1]);
+        foreach ($seqs as $seq) {
+          $featureloc = $this->getFeatureLoc($seq['featureloc_id']);
+          $coords = $this->getSequenceCoords($featureloc);
+          $entity->{$field_name}['und'][]['value'] = [
+            $label_term => 'Gene Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
+            $sequence_term => $seq['residues'],
+            $description_term => 'This gene sequence was extracted from the reference sequence location at ' . $coords['schema:description'] . '.' ,
+            $seq_coords_term => $coords,
+            $seq_length_term => strlen($seq['residues']),
+            $seq_md5sum_term => md5($seq['residues']),
+            $type_term => 'gene'
+          ];
+        }
+      }
+    }
+  }
+
+  /**
+   *
+   * @param unknown $entity
+   * @param unknown $feature
+   * @param unknown $field_name
+   */
+  private function addProtein(&$entity, $feature, $field_name) {
+    $label_term = 'rdfs:label';
+    $type_term = 'rdfs:type';
+    $description_term = 'schema:description';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+
+    $sql = "
+      SELECT F.*
+      FROM {feature_relationship} FR
+        INNER JOIN {feature} F on FR.subject_id = F.feature_id
+        INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
+        INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
+      WHERE
+        FR.object_id = :feature_id and
+        CVT.name = 'polypeptide' and
+        RCVT.name IN ('derives_from', 'part_of')
+      ORDER BY FR.rank ASC
+    ";
+    $proteins = chado_query($sql, [':feature_id' => $feature->feature_id]);
+    while ($protein = $proteins->fetchObject()) {
+      if (!empty($protein->residues)) {
+        $entity->{$field_name}['und'][]['value'] = [
+          $label_term => 'Protein Sequence (' . number_format(strlen($protein->residues)) . 'aa)',
+          $sequence_term => $protein->residues,
+          $description_term => 'The protein sequence.',
+          $seq_length_term => strlen($protein->residues),
+          $seq_md5sum_term => md5($protein->residues),
+          $type_term => 'polypeptide'
+        ];
+      }
+    }
+  }
+}
+

+ 63 - 0
tripal_chado/includes/TripalFields/data__sequence_record/data__sequence_record_formatter.inc

@@ -0,0 +1,63 @@
+<?php
+
+class data__sequence_record_formatter extends ChadoFieldFormatter {
+
+  // The default label for this field.
+  public static $default_label = 'Sequences';
+
+  // The list of field types for which this formatter is appropriate.
+  public static $field_types = ['data__sequence_record'];
+
+  /**
+   * @see TripalFieldFormatter::view()
+   */
+  public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
+
+    $label_term = 'rdfs:label';
+    $description_term = 'schema:description';
+    $type_term = 'rdfs:type';
+    $sequence_term = chado_get_semweb_term('feature', 'residues');
+    $seq_coords_term = 'data:2012';
+    $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
+    $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+    $description_term = 'schema:description';
+    $reference_term = 'data:3002';
+    $fmin_term = chado_get_semweb_term('featureloc', 'fmin');
+    $fmax_term = chado_get_semweb_term('featureloc', 'fmax');
+    $strand_term = chado_get_semweb_term('featureloc', 'strand');
+    $phase_term = chado_get_semweb_term('featureloc', 'phase');
+
+    $content = [];
+
+    foreach ($items as $delta => $item) {
+      if (empty($item['value'])) {
+        continue;
+      }
+
+      $num_bases = 50;
+      $residues = '<pre class="residues-formatter">';
+      $residues .= wordwrap($item['value'][$sequence_term], $num_bases, "<br>", TRUE);
+      $residues .= '</pre>';
+
+      $content[] = [
+        '#type' => 'item',
+        '#title' => $item['value'][$label_term],
+        '#description' => $item['value'][$description_term],
+        '#markup' => $residues,
+      ];
+
+
+    }
+
+
+    if (empty($content)) {
+      $element[0] = [
+        '#type' => 'markup',
+        '#markup' => '',
+      ];
+      return;
+    }
+
+    $element[0] = $content;
+  }
+}

+ 26 - 0
tripal_chado/includes/TripalFields/data__sequence_record/data__sequence_record_widget.inc

@@ -0,0 +1,26 @@
+<?php
+
+class data__sequence_record_widget extends ChadoFieldWidget {
+
+  // The default label for this field.
+  public static $default_label = 'Sequences';
+
+  // The list of field types for which this formatter is appropriate.
+  public static $field_types = ['data__sequence_record'];
+
+  /**
+   *
+   * @see TripalFieldWidget::form()
+   */
+  public function form(&$widget, &$form, &$form_state, $langcode, $items, $delta, $element) {
+
+  }
+
+  /**
+   *
+   * @see TripalFieldWidget::submit()
+   */
+  public function validate($element, $form, &$form_state, $langcode, $delta) {
+
+  }
+}

+ 1 - 1
tripal_chado/includes/TripalFields/sbo__relationship/sbo__relationship.inc

@@ -524,7 +524,7 @@ class sbo__relationship extends ChadoField {
       // relationship type and the object and subject
       // relationship type and the object and subject
       'include_fk' => [
       'include_fk' => [
         'type_id' => 1,
         'type_id' => 1,
-        $object_id_key => [
+          $object_id_key => [
           'type_id' => 1,
           'type_id' => 1,
           'organism_id' => 1,
           'organism_id' => 1,
         ],
         ],

+ 25 - 15
tripal_chado/includes/TripalFields/so__cds/so__cds.inc

@@ -72,24 +72,24 @@ class so__cds extends ChadoField {
       'value' => '',
       'value' => '',
     ];
     ];
 
 
-    $options = [
-      'return_array' => TRUE,
-      'order_by' => ['rank' => 'ASC'],
-    ];
-    $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
-    $featurelocs = $feature->featureloc->feature_id;
-
-    // Verify that we have featurelocs before entering the loop
-    if (!is_array($featurelocs)) {
-      return;
-    }
-
-    foreach ($featurelocs as $featureloc) {
+    // Get the featureloc records that this feature is aligned to. We use
+    // this SQL rather than the chado_expand_var function because we don't
+    // want the residues included from the srcfeature_id which may be huge
+    // and overrun memory.
+    $featurelocs_sql = "
+      SELECT FL.featureloc_id, FL.srcfeature_id
+      FROM {featureloc} FL
+      WHERE FL.feature_id = :feature_id
+      ORDER BY rank ASC
+    ";
+    $aligned = chado_query($featurelocs_sql, [':feature_id' => $feature->feature_id]);
+    $index = 0;
+    while ($featureloc = $aligned->fetchObject()) {
       // Generate a CDS sequence if one exsits for this feature alignment.
       // Generate a CDS sequence if one exsits for this feature alignment.
       $cds_sequence = chado_get_feature_sequences(
       $cds_sequence = chado_get_feature_sequences(
         [
         [
           'feature_id' => $feature->feature_id,
           'feature_id' => $feature->feature_id,
-          'parent_id' => $featureloc->srcfeature_id->feature_id,
+          'parent_id' => $featureloc->srcfeature_id,
           'name' => $feature->name,
           'name' => $feature->name,
           'featureloc_id' => $featureloc->featureloc_id,
           'featureloc_id' => $featureloc->featureloc_id,
         ],
         ],
@@ -111,7 +111,17 @@ class so__cds extends ChadoField {
         // that one mRNA is only aligned to a single location on the assembly so we
         // that one mRNA is only aligned to a single location on the assembly so we
         // can access the CDS sequence with index 0.
         // can access the CDS sequence with index 0.
         if ($cds_sequence[0]['residues']) {
         if ($cds_sequence[0]['residues']) {
-          $entity->{$field_name}['und'][$num_seqs++]['value'] = $cds_sequence[0]['residues'];
+          $entity->{$field_name}['und'][$num_seqs++] = [
+            'value' => $cds_sequence[0]['residues'],
+            // This field was incorrelctly listed as a field in the featureprop
+            // table, but really it is a derived field. So, we have to do this
+            // hacky fix to get around the problem.
+            'chado-featureprop__featureprop_id' => NULL,
+            'chado-featureprop__feature_id' => NULL,
+            'chado-featureprop__value' => NULL,
+            'chado-featureprop__type_id' => NULL,
+            'chado-featureprop__rank' => NULL
+          ];
         }
         }
       }
       }
     }
     }

+ 3 - 4
tripal_chado/includes/TripalFields/so__cds/so__cds_widget.inc

@@ -15,16 +15,15 @@ class so__cds_widget extends ChadoFieldWidget {
   public function form(&$widget, &$form, &$form_state, $langcode, $items, $delta, $element) {
   public function form(&$widget, &$form, &$form_state, $langcode, $items, $delta, $element) {
     parent::form($widget, $form, $form_state, $langcode, $items, $delta, $element);
     parent::form($widget, $form, $form_state, $langcode, $items, $delta, $element);
 
 
-    // TODO: add a widget...
   }
   }
 
 
 
 
+
+
   /**
   /**
    *
    *
    * @see TripalFieldWidget::submit()
    * @see TripalFieldWidget::submit()
    */
    */
-  public function submit($form, &$form_state, $entity_type, $entity, $langcode, $delta) {
-    $field_name = $this->field['field_name'];
-
+  public function validate($element, $form, &$form_state, $langcode, $delta) {
   }
   }
 }
 }

+ 44 - 0
tripal_chado/includes/tripal_chado.fields.inc

@@ -380,6 +380,18 @@ function tripal_chado_bundle_fields_info_custom(&$info, $details, $entity_type,
         'type' => 'field_chado_storage',
         'type' => 'field_chado_storage',
       ],
       ],
     ];
     ];
+
+    $field_name = 'data__sequence_record';
+    $field_type = 'data__sequence_record';
+    $info[$field_name] = [
+      'field_name' => $field_name,
+      'type' => $field_type,
+      'cardinality' => FIELD_CARDINALITY_UNLIMITED,
+      'locked' => FALSE,
+      'storage' => [
+        'type' => 'field_chado_storage',
+      ],
+    ];
   }
   }
 
 
   // FEATURE SEQLEN
   // FEATURE SEQLEN
@@ -1703,6 +1715,38 @@ function tripal_chado_bundle_instances_info_custom(&$info, $entity_type, $bundle
         ],
         ],
       ],
       ],
     ];
     ];
+
+    $field_name = 'data__sequence_record';
+    $info[$field_name] = [
+      'field_name' => $field_name,
+      'entity_type' => $entity_type,
+      'bundle' => $bundle->name,
+      'label' => 'Sequences',
+      'description' => 'A molecular sequence and associated metadata.',
+      'required' => FALSE,
+      'settings' => [
+        'auto_attach' => FALSE,
+        'chado_table' => $table_name,
+        'chado_column' => 'residues',
+        'base_table' => $table_name,
+        'term_accession' => '0849',
+        'term_vocabulary' => 'data',
+        'term_name' => 'Sequence record',
+      ],
+      'widget' => [
+        'type' => 'data__sequence_record_widget',
+        'settings' => [
+          'display_label' => 1,
+        ],
+      ],
+      'display' => [
+        'default' => [
+          'label' => 'above',
+          'type' => 'data__sequence_record_formatter',
+          'settings' => [],
+        ],
+      ],
+    ];
   }
   }
 
 
   // FEATURE SEQLEN
   // FEATURE SEQLEN

+ 8 - 1
tripal_chado/includes/tripal_chado.semweb.inc

@@ -566,10 +566,17 @@ function tripal_chado_populate_vocab_EDAM() {
     'id' => 'data:2044',
     'id' => 'data:2044',
     'name' => 'Sequence',
     'name' => 'Sequence',
     'cv_name' => 'EDAM',
     'cv_name' => 'EDAM',
-    'definition' => 'One or more molecular sequences, possibly with associated annotation..',
+    'definition' => 'One or more molecular sequences, possibly with associated annotation.',
   ]);
   ]);
   chado_associate_semweb_term('feature', 'residues', $term);
   chado_associate_semweb_term('feature', 'residues', $term);
 
 
+  $term = chado_insert_cvterm([
+    'id' => 'data:0849',
+    'name' => 'Sequence record',
+    'cv_name' => 'EDAM',
+    'definition' => 'A molecular sequence and associated metadata.',
+  ]);
+
   $term = chado_insert_cvterm([
   $term = chado_insert_cvterm([
     'id' => 'data:0842',
     'id' => 'data:0842',
     'name' => 'Identifier',
     'name' => 'Identifier',

+ 1 - 1
tripal_chado/theme/css/tripal_chado.css

@@ -36,7 +36,7 @@
 
 
 .residues-formatter {
 .residues-formatter {
   color: black;
   color: black;
-  height: 100px;
+  height: 200px;
   max-width: 500px;
   max-width: 500px;
   overflow: scroll;
   overflow: scroll;
   white-space: normal;
   white-space: normal;

+ 18 - 0
tripal_chado/tripal_chado.install

@@ -2074,4 +2074,22 @@ function tripal_chado_update_7338() {
     throw new DrupalUpdateException('Could not perform update: '. $error);
     throw new DrupalUpdateException('Could not perform update: '. $error);
   }
   }
 
 
+}
+
+/**
+ * Adds the "Sequence record" cvterm for the data__sequence_record field.
+ */
+function tripal_chado_update_7339() {
+  try {
+    $term = chado_insert_cvterm([
+      'id' => 'data:0849',
+      'name' => 'Sequence record',
+      'cv_name' => 'EDAM',
+      'definition' => 'A molecular sequence and associated metadata.',
+    ]);
+    drupal_set_message("\n\nNOTE: This update includes a new \"Sequences\" field. It compiles both primary sequences and sequences extracted from the reference into a single list. For mRNA it provides gene, full length mRNA, CDS and protein sequences (if available). Please consider using this new field and disabling other sequence fields.\n\n");
+  } catch (\PDOException $e) {
+    $error = $e->getMessage();
+    throw new DrupalUpdateException('Could not perform update: '. $error);
+  }
 }
 }

+ 17 - 8
tripal_ds/includes/tripal_ds.ds.inc

@@ -72,28 +72,37 @@ function _ds_layout_settings_info($bundle_name, $instances) {
         $instance_base_table = array_key_exists('base_table', $instance['settings']) ? $instance['settings']['base_table'] : '';
         $instance_base_table = array_key_exists('base_table', $instance['settings']) ? $instance['settings']['base_table'] : '';
         $instance_base_chado = array_key_exists('chado_table', $instance['settings']) ? $instance['settings']['chado_table'] : '';
         $instance_base_chado = array_key_exists('chado_table', $instance['settings']) ? $instance['settings']['chado_table'] : '';
         $prop_table = strpos($instance_base_chado, 'prop');
         $prop_table = strpos($instance_base_chado, 'prop');
+        $data_sequence_record = strpos($instance_name, 'data__sequence_record');
         $data_sequence = strpos($instance_name, 'data__sequence');
         $data_sequence = strpos($instance_name, 'data__sequence');
         if ($instance_base_chado && $instance_base_table) {
         if ($instance_base_chado && $instance_base_table) {
 
 
           if ($instance_base_chado == $instance_base_table) {
           if ($instance_base_chado == $instance_base_table) {
-            if ($prop_table !== FALSE) {
-              array_push($prop_fields, $instance_name);
-            }
-            elseif ($data_sequence !== FALSE) {
+            if ($data_sequence_record !== FALSE) {
               array_push($data_sequence_fields, $instance_name);
               array_push($data_sequence_fields, $instance_name);
             }
             }
+            elseif ($data_sequence !== FALSE or $instance_name == 'so__cds' or $instance_name == 'data__protein_sequence') {
+              // Do nothing as these chado column sequence fields
+              // should be hidden by default.
+            }
+            elseif ($prop_table !== FALSE) {
+              array_push($prop_fields, $instance_name);
+            }
             else {
             else {
               array_push($summary_fields, $instance_name);
               array_push($summary_fields, $instance_name);
             }
             }
 
 
           }
           }
           elseif ($instance_base_chado != $instance_base_table) {
           elseif ($instance_base_chado != $instance_base_table) {
-            if ($prop_table !== FALSE) {
-              array_push($prop_fields, $instance_name);
-            }
-            elseif ($data_sequence !== FALSE) {
+            if ($data_sequence_record !== FALSE) {
               array_push($data_sequence_fields, $instance_name);
               array_push($data_sequence_fields, $instance_name);
             }
             }
+            elseif ($data_sequence !== FALSE or $instance_name == 'so__cds' or $instance_name == 'data__protein_sequence') {
+              // Do nothing as these chado column sequence fields
+              // should be hidden by default.
+            }
+            elseif ($prop_table !== FALSE) {
+              array_push($prop_fields, $instance_name);
+            }
             else {
             else {
               array_push($all_other_fields, $instance);
               array_push($all_other_fields, $instance);
 
 

+ 1 - 0
tripal_ws/includes/TripalWebService.inc

@@ -635,6 +635,7 @@ class TripalWebService {
 
 
     if (!$term) {
     if (!$term) {
       $backtrace = debug_backtrace();
       $backtrace = debug_backtrace();
+      print_r($backtrace[1]['args']);
       throw new Exception('addContextTerm: Please provide a non NUll or non empty $term.');
       throw new Exception('addContextTerm: Please provide a non NUll or non empty $term.');
 
 
     }
     }