Browse Source

Merge branch '7.x-3.x' into 100-tv3-gene_pages

Stephen Ficklin 4 years ago
parent
commit
4ba9fd11bb
22 changed files with 465 additions and 322 deletions
  1. 11 17
      docs/user_guide/example_genomics/func_annots/setup.rst
  2. BIN
      docs/user_guide/example_genomics/func_annots/setup2.png
  3. BIN
      docs/user_guide/example_genomics/genomes_genes.1.png
  4. 6 2
      docs/user_guide/example_genomics/genomes_genes.rst
  5. 8 9
      docs/user_guide/example_genomics/pub_import.rst
  6. 1 1
      tripal/includes/tripal.importer.inc
  7. 8 8
      tripal_chado/api/modules/tripal_chado.feature.api.inc
  8. 61 20
      tripal_chado/api/modules/tripal_chado.pub.api.inc
  9. 6 0
      tripal_chado/includes/TripalFields/chado_linker__prop/chado_linker__prop_widget.inc
  10. 45 42
      tripal_chado/includes/TripalFields/data__sequence_record/data__sequence_record.inc
  11. 6 12
      tripal_chado/includes/TripalFields/data__sequence_record/data__sequence_record_formatter.inc
  12. 36 26
      tripal_chado/includes/TripalFields/sbo__database_cross_reference/sbo__database_cross_reference.inc
  13. 76 15
      tripal_chado/includes/TripalFields/sbo__database_cross_reference/sbo__database_cross_reference_formatter.inc
  14. 8 5
      tripal_chado/includes/TripalFields/sbo__relationship/sbo__relationship.inc
  15. 21 13
      tripal_chado/includes/TripalFields/sbo__relationship/sbo__relationship_formatter.inc
  16. 44 35
      tripal_chado/includes/TripalImporter/GFF3Importer.inc
  17. 10 0
      tripal_chado/includes/tripal_chado.field_storage.inc
  18. 11 0
      tripal_chado/includes/tripal_chado.semweb.inc
  19. 21 2
      tripal_chado/tripal_chado.install
  20. 1 1
      tripal_ds/api/tripal_ds.pane.api.inc
  21. 80 109
      tripal_ds/includes/tripal_ds.ds.inc
  22. 5 5
      tripal_ds/includes/tripal_ds.field_group.inc

+ 11 - 17
docs/user_guide/example_genomics/func_annots/setup.rst

@@ -3,41 +3,37 @@ Module Setup
 .. note::
 
   Remember you must set the ``$DRUPAL_HOME`` environment variable if you want to cut-and-paste the commands below. See :doc:`../../install_tripal/drupal_home`
-  
-  
-For this example we will be load functional data for our gene. To do this we will use the Blast, KEGG, and InterPro extension modules. However, these extension modules are not part of the "core" Tripal package but are available as separate extensions.  Anyone may create extensions for Tripal.  These extensions are useful for genomic data and therefore are included in this tutorial. 
+
+
+For this example we will be load functional data for our gene. To do this we will use the Blast, KEGG, and InterPro extension modules. However, these extension modules are not part of the "core" Tripal package but are available as separate extensions.  Anyone may create extensions for Tripal.  These extensions are useful for genomic data and therefore are included in this tutorial.
 
 To download these modules:
 
   ::
-  
-    cd $DRUPAL_HOME    
+
+    cd $DRUPAL_HOME
     drush pm-download tripal_analysis_blast
-    drush pm-download tripal_analysis_kegg
     drush pm-download tripal_analysis_interpro
 
 Now, enable these extension modules:
 
   ::
-  
+
     drush pm-enable tripal_analysis_blast
     drush pm-enable tripal_analysis_interpro
-    drush pm-enable tripal_analysis_kegg
 
 For this example, we will use the following files which are available for downloading:
 
 - `Citrus_sinensis-orange1.1g015632m.g.iprscan.xml <http://www.gmod.org/mediawiki/images/0/0c/Citrus_sinensis-orange1.1g015632m.g.iprscan.xml>`_
-- `Citrus_sinensis-orange1.1g015632m.g.KEGG.heir.tar.gz <http://www.gmod.org/mediawiki/images/1/13/Citrus_sinensis-orange1.1g015632m.g.KEGG.heir.tar.gz>`_
 - `Blastx_citrus_sinensis-orange1.1g015632m.g.fasta.0_vs_uniprot_sprot.fasta.out <http://www.gmod.org/mediawiki/images/e/e8/Blastx_citrus_sinensis-orange1.1g015632m.g.fasta.0_vs_uniprot_sprot.fasta.out>`_
 - `Blastx_citrus_sinensis-orange1.1g015632m.g.fasta.0_vs_nr.out <http://www.gmod.org/mediawiki/images/2/24/Blastx_citrus_sinensis-orange1.1g015632m.g.fasta.0_vs_nr.out>`_
 
 Download these files to the ```$DRUPAL_HOME/sites/default/files``` directory. To do so quickly run these commands:
 
   ::
-  
+
     cd $DRUPAL_HOME/sites/default/files
     wget http://www.gmod.org/mediawiki/images/0/0c/Citrus_sinensis-orange1.1g015632m.g.iprscan.xml
-    wget http://www.gmod.org/mediawiki/images/1/13/Citrus_sinensis-orange1.1g015632m.g.KEGG.heir.tar.gz
     wget http://www.gmod.org/mediawiki/images/e/e8/Blastx_citrus_sinensis-orange1.1g015632m.g.fasta.0_vs_uniprot_sprot.fasta.out
     wget http://www.gmod.org/mediawiki/images/2/24/Blastx_citrus_sinensis-orange1.1g015632m.g.fasta.0_vs_nr.out
 
@@ -45,23 +41,22 @@ Each of these modules provides new fields for both the **gene** and **mRNA** con
 
 .. image:: setup1.png
 
-Next, we need to position the new field. Using the skills you learned in the :doc:`../../content_types/configuring_page_display` Create three new **Tripal Panes** named:
+Next, we need to position the new field. Using the skills you learned in the :doc:`../../content_types/configuring_page_display` Create two new **Tripal Panes** named:
 
 - Blast Results
 - Protein Domains
-- KEGG Pathways
 
 Be sure to:
 
-- Place the three new fields into each pane respectively 
-- Move the Panes out of the **disabled** section. 
+- Place the new fields into each pane respectively
+- Move the Panes out of the **disabled** section.
 - Set the label for each field to **Hidden**.
 
 The following shows an example of this layout:
 
 .. image:: setup2.png
 
-The fields are now ready for display once data is added!  
+The fields are now ready for display once data is added!
 
 .. note::
 
@@ -70,4 +65,3 @@ The fields are now ready for display once data is added!
 .. note::
 
   Anytime you install a Tripal v3 extension module you should check for new fields, and then place those fields in the layout.  Extension modules often will not do this for you because they do not assume you want these new fields.
-  

BIN
docs/user_guide/example_genomics/func_annots/setup2.png


BIN
docs/user_guide/example_genomics/genomes_genes.1.png


+ 6 - 2
docs/user_guide/example_genomics/genomes_genes.rst

@@ -26,9 +26,13 @@ Enter the following:
 
   "File", "Upload the file name Citrus_sinensis-orange1.1g015632m.g.gff3"
   "Analysis", "Whole Genome Assembly and Annotation of Citrus sinensis"
-  "Organism", "Citrus sinensis"
+  "Existing Organism", "Citrus sinensis"
+  "Landmark Type", "supercontig"
   "All other options", "leave as default"
 
+.. note::
+    The Landmark Type is provided for this demo GFF3 file because the chromosome is not defined in the file, only the genomic features on the chromosomes.  The landmark type is not needed if the GFF3 file has the chromosomes (scaffolds or contigs) defined in the GFF3 file.
+
 Finally, click the Import GFF3 file button. You'll notice a job was submitted to the jobs subsystem. Now, to complete the process we need the job to run. We'll do this manually:
 
 ::
@@ -78,7 +82,7 @@ You should see output similar to the following:
     Step 22 of 26: Insert feature ontology terms...
     Step 23 of 26: Insert 'derives_from' relationships...
     Step 24 of 26: Insert Targets...
-    Step 25 of 26: Associate features with analysis....              
+    Step 25 of 26: Associate features with analysis....
     Step 26 of 26: Adding sequences data (Skipped: none available)...
 
     Done.

+ 8 - 9
docs/user_guide/example_genomics/pub_import.rst

@@ -3,7 +3,7 @@ Importing Publications
 .. note::
 
   Remember you must set the ``$DRUPAL_HOME`` environment variable if you want to cut-and-paste the commands below. See :doc:`../install_tripal/drupal_home`
-  
+
 Tripal provides an interface for automatically and manually adding publications.
 
 Manually Adding a Publication
@@ -64,7 +64,7 @@ Now, save this importer. You should see that we have one importer in the list:
 
 .. image:: pub_import.4.png
 
-We can use this importer to load all  publications related to Citrus sinensis from PubMed into our database (how to load these will be shown later). However, what if new publications are added? We would like this importer to be run monthly so that we can automatically add new publications as they become available. But we do not need to try to reload these 760 every time the loader runs each month. We will create a new importer that only finds publications within the last 30 days. To do this, click the link New Importer. Now, add the following criteria:
+We can use this importer to load all  publications related to <i>Citrus sinensis</i> from PubMed into our database (how to load these will be shown later). However, what if new publications are added? We would like this importer to be run monthly so that we can automatically add new publications as they become available. But we do not need to try to reload the same publications every time the loader runs each month. We will create a new importer that only finds publications within the last 30 days. To do this, click the link New Importer. Now, add the following criteria:
 
 .. csv-table::
   :header: "Field Name", "Value"
@@ -142,14 +142,14 @@ Import from the USDA National Agricultural Library
 The instructions for the Tripal publication importer described previously use the the NCBI PubMed database. However, you can also import publications from the USDA National Agriculture Library (AGRICOLA). However, to use this repository a few software dependences are required.  These include:
 
 - The `YAZ library <https://www.indexdata.com/resources/software/yaz/>`_
-- `PHP support for YAZ <https://www.php.net/manual/en/book.yaz.php>`_  
+- `PHP support for YAZ <https://www.php.net/manual/en/book.yaz.php>`_
 
-The following instructions are to install the necessary dependencies on an Ubuntu 18.04 LTS.   
+The following instructions are to install the necessary dependencies on an Ubuntu 18.04 LTS.
 
 First install yaz, the yaz development library and the php development library:
 
 .. code-block:: bash
- 
+
   sudo apt-get install yaz libyaz5-dev php-dev
 
 
@@ -157,10 +157,10 @@ Next update the PECL tool and install the PHP yaz library:
 
 
 .. code-block:: bash
-  
+
   sudo pecl channel-update pecl.php.net
   sudo pecl install yaz
-  
+
 Next, edit the `php.ini` files.  On Ubuntu 18.04 there are two PHP files:
 
 - `/etc/php/7.2/cli/php.ini`
@@ -175,8 +175,7 @@ Add the following line to each file:
 Finally, restart the web server so that it picks up the changes to the `php.ini` file.
 
 .. code-block:: bash
-  
+
   sudo service apache2 restart
 
 You can now import publications from Agricola using the same interface as with PubMed.
-

+ 1 - 1
tripal/includes/tripal.importer.inc

@@ -80,7 +80,7 @@ function tripal_get_importer_form($form, &$form_state, $class) {
     $analyses = [];
     $analyses[''] = '';
     while ($analysis = $org_rset->fetchObject()) {
-      $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
+      $analyses[$analysis->analysis_id] = $analysis->name;
     }
     $form['analysis_id'] = [
       '#title' => t('Analysis'),

+ 8 - 8
tripal_chado/api/modules/tripal_chado.feature.api.inc

@@ -785,21 +785,21 @@ function chado_get_fasta_defline($feature, $notes = '', $featureloc = NULL, $typ
 
   // Construct the definition line.
   $defline = $feature->uniquename . " " .
-    'ID=' . $feature->uniquename . "|" .
-    'Name=' . $feature->name . "|" .
-    'organism=' . $feature->organism_id->genus . " " . $feature->organism_id->species . "|" .
-    'type=' . $type . '|';
+    'ID=' . $feature->uniquename . "; " .
+    'Name=' . $feature->name . "; " .
+    'organism=' . $feature->organism_id->genus . " " . $feature->organism_id->species . "; " .
+    'type=' . $type . '; ';
   if ($length > 0) {
-    $defline .= "length=" . $length . "bp|";
+    $defline .= "length=" . $length . "bp; ";
   }
   if ($featureloc) {
-    $defline .= "location=Sequence derived from alignment at " . chado_get_location_string($featureloc);
+    $defline .= "location=Sequence derived from: " . chado_get_location_string($featureloc);
     $defline .= " (" . $featureloc->srcfeature_id->organism_id->genus . " " . $featureloc->srcfeature_id->organism_id->species . ")|";
   }
   if ($notes) {
-    $defline .= "Notes=$notes|";
+    $defline .= "Notes=$notes; ";
   }
-  $defline = substr($defline, 0, -1); // remove the trailing |
+  $defline = substr($defline, 0, -2); // remove the trailing "; "
   return $defline;
 }
 

+ 61 - 20
tripal_chado/api/modules/tripal_chado.pub.api.inc

@@ -105,7 +105,7 @@ function chado_get_publication($identifiers, $options = []) {
     }
     else {
       tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
-        "chado_get_publication: The dbxref identifier is not correctly formatted.",
+        "chado_get_publication: The dbxref identifier is not correctly formatted. Identifiers passed: %identifier.",
         ['%identifier' => print_r($identifiers, TRUE)]
       );
     }
@@ -1169,6 +1169,47 @@ function chado_pub_create_citation($pub) {
   return $citation;
 }
 
+/**
+ * Retrieves an array with all database cross references
+ *
+ * Implemented as SQL for performance reasons because chado_expand_var
+ * can take too long as it loads more information than needed
+ *
+ * @param $pub_id
+ *   A pub_id from the 'chado.pub' table
+ *
+ * @return
+ *   An array of records with the following keys:  'accession', 'version',
+ *   'description', 'name', 'url', 'urlprefix'.
+ *   These are the column names from the 'dbxref' and 'db' tables
+ *
+ * @ingroup tripal_pub_api
+ */
+function chado_get_pub_dbxrefs($pub_id) {
+  $fkey = 'pub_id';  // Should this be looked up in the schema?
+  $options = ['return_array' => 1];
+  $sql = "SELECT REF.accession, REF.version, REF.description, DB.name, DB.url, DB.urlprefix "
+       . "FROM {pub_dbxref} LINK "
+       . "INNER JOIN {dbxref} REF on LINK.dbxref_id = REF.dbxref_id "
+       . "INNER JOIN {db} DB on REF.db_id = DB.db_id "
+       . "WHERE LINK.$fkey = :pub_id";
+  $args = [':pub_id' => $pub_id];
+  $records = chado_query($sql, $args);
+
+  $results = [];
+  $delta = 0;
+  while($record = $records->fetchObject()) {
+    $results[$delta]['accession'] = $record->accession;
+    $results[$delta]['version'] = $record->version;
+    $results[$delta]['description'] = $record->description;
+    $results[$delta]['name'] = $record->name;
+    $results[$delta]['url'] = $record->url;
+    $results[$delta]['urlprefix'] = $record->urlprefix;
+    $delta++;
+  }
+  return $results;
+}
+
 /**
  * Retrieves the minimal information to uniquely describe any publication.
  *
@@ -1238,17 +1279,24 @@ function chado_get_minimal_pub_info($pub) {
     }
   }
 
+  // Load all database cross references.
+  $pub_dbxrefs = chado_get_pub_dbxrefs($pub->pub_id);
+
   // Get the first database cross-reference with a url.
-  $options = ['return_array' => 1];
-  $pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
-  $dbxref = NULL;
-  if ($pub->pub_dbxref) {
-    foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
-      if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
-        $dbxref = $pub_dbxref->dbxref_id;
-      }
-    }
-  }
+// it is not clear what this was doing, it would have retrieved the last not the first
+// dbxref with a url, but the variable $dbxref is not referenced later. It could have
+// added information to $pub, but that does not appear to be referenced later.
+// chado_expand_var() can sometimes take a long time to execute, so just remove it?
+//  $options = ['return_array' => 1];
+//  $pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
+//  $dbxref = NULL;
+//  if ($pub->pub_dbxref) {
+//    foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
+//      if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
+//        $dbxref = $pub_dbxref->dbxref_id;
+//      }
+//    }
+//  }
 
   // Get the URL.
   $values = [
@@ -1270,17 +1318,10 @@ function chado_get_minimal_pub_info($pub) {
     }
   }
 
-  // Get the list of database cross references.
-  $values = [
-    'pub_id' => $pub->pub_id,
-  ];
-  $options = [
-    'return_array' => 1,
-  ];
-  $pub_dbxrefs = chado_generate_var('pub_dbxref', $values, $options);
+  // Generate a list of database cross references formatted as "DB:accession".
   $dbxrefs = [];
   foreach ($pub_dbxrefs as $pub_dbxref) {
-    $dbxrefs[] = $pub_dbxref->dbxref_id->db_id->name . ':' . $pub_dbxref->dbxref_id->accession;
+    $dbxrefs[] = $pub_dbxref['name'] . ':' . $pub_dbxref['accession'];
   }
 
   // Get the citation.

+ 6 - 0
tripal_chado/includes/TripalFields/chado_linker__prop/chado_linker__prop_widget.inc

@@ -143,6 +143,12 @@ class chado_linker__prop_widget extends ChadoFieldWidget {
     $value = $form_state['values'][$field_name]['und'][$delta]['chado-' . $field_table . '__value'];
     $form_state['values'][$field_name]['und'][$delta]['value'] = $value;
 
+    // A value of zero gets set to empty when sbumitted and some prop
+    // tables don't have a default value set (e.g. pubprop).
+    if (!$form_state['values'][$field_name]['und'][$delta]['chado-' . $field_table . '__rank']) {
+      $form_state['values'][$field_name]['und'][$delta]['chado-' . $field_table . '__rank'] = 0;
+    }
+
     // If the user removed the property then we want to clear out the other
     // fields except the pkey value. If the pkey field is present and the value
     // is present then the chado storage backend will delete the record.

+ 45 - 42
tripal_chado/includes/TripalFields/data__sequence_record/data__sequence_record.inc

@@ -75,6 +75,9 @@ class data__sequence_record extends ChadoField {
     $field_name = $this->field['field_name'];
     $feature = $entity->chado_record;
 
+    // Intialize the field items array
+    $entity->{$field_name}['und'] = [];
+
     // Add the primary sequence from the Chada feature table, residues column.
     $feature = chado_expand_var($feature, 'field', 'feature.residues');
 
@@ -84,8 +87,8 @@ class data__sequence_record extends ChadoField {
     // If this is an mRNA feature then add the gene parent, full length
     // mRNA, CDS and protein.
     if ($feature->type_id->name == 'mRNA') {
-      $this->addGeneParent($entity, $feature, $field_name);
       $featurelocs = $this->addFLmRNA($entity, $feature, $field_name);
+      $this->addGeneParent($entity, $feature, $field_name);
       if (count($featurelocs) > 0) {
         $this->addCDS($entity, $feature, $field_name, $featurelocs);
         $this->addProtein($entity, $feature, $field_name);
@@ -95,6 +98,12 @@ class data__sequence_record extends ChadoField {
     else {
       $this->addGenericReference($entity, $feature, $field_name);
     }
+
+    // The field should always return a 'value' key so even if we don't
+    // have a sequence we still need to add it
+    if (count(array_keys($entity->{$field_name}['und'])) == 0) {
+      $entity->{$field_name}['und'][0]['value'] = '';
+    }
   }
 
   /**
@@ -111,6 +120,7 @@ class data__sequence_record extends ChadoField {
     $seq_coords_term = 'data:2012';
     $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
     $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+    $fasta_defline = 'local:fasta_definition';
 
     $options = [
       'derive_from_parent' => 1,
@@ -123,12 +133,13 @@ class data__sequence_record extends ChadoField {
 
       $entity->{$field_name}['und'][]['value'] = [
         $sequence_term => $seq['residues'],
-        $label_term =>  ucfirst(preg_replace('/_/', ' ', $feature->type_id->name)) . ' Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
-        $description_term => 'This sequence was extracted from the reference sequence location at ' . $coords['schema:description'] . '.',
+        $label_term =>  'Derived ' . ucfirst(preg_replace('/_/', ' ', $feature->type_id->name)) . ' Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
+        $description_term => 'This sequence was derived by extracting bases from the reference sequence location at ' . $coords['schema:description'] . '.',
         $seq_coords_term => $coords,
         $seq_length_term => strlen($seq['residues']),
         $seq_md5sum_term => md5($seq['residues']),
-        $type_term => $feature->type_id->name
+        $type_term => $feature->type_id->name,
+        $fasta_defline => $seq['defline'],
       ];
 
       $featurelocs[] = $featureloc;
@@ -137,10 +148,7 @@ class data__sequence_record extends ChadoField {
   }
 
   /**
-   *
-   * @param unknown $entity
-   * @param unknown $feature
-   * @param unknown $field_name
+   * Adds the primary sequence from the feature.residues column.
    */
   private function addPrimary(&$entity, $feature, $field_name) {
 
@@ -150,25 +158,24 @@ class data__sequence_record extends ChadoField {
     $sequence_term = chado_get_semweb_term('feature', 'residues');
     $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
     $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+    $fasta_defline = 'local:fasta_definition';
 
     if ($feature->residues) {
       $entity->{$field_name}['und'][]['value'] = [
-        $label_term => 'Primary ' . preg_replace('/_/', ' ', $feature->type_id->name) . 'Sequence (' . number_format($feature->seqlen) . 'bp)',
+        $label_term => 'Primary ' . preg_replace('/_/', ' ', $feature->type_id->name) . ' Sequence (' . number_format($feature->seqlen) . 'bp)',
         $description_term => 'This is the primary representative sequence for this feature.',
         $sequence_term => $feature->residues,
         $seq_length_term => $feature->seqlen,
         $seq_md5sum_term => $feature->md5checksum,
-        $type_term => $feature->type_id->name
+        $type_term => $feature->type_id->name,
+        $fasta_defline => chado_get_fasta_defline($feature)
       ];
     }
   }
 
 
   /**
-   *
-   * @param unknown $entity
-   * @param unknown $feature
-   * @param unknown $field_name
+   * Adds the full length mRNA sequence (only for gene features).
    */
   private function addFLmRNA(&$entity, $feature, $field_name) {
     $label_term = 'rdfs:label';
@@ -178,6 +185,7 @@ class data__sequence_record extends ChadoField {
     $seq_coords_term = 'data:2012';
     $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
     $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+    $fasta_defline = 'local:fasta_definition';
 
     // Sometimes an mRNA may have only exons, only CDS or both exons and
     // CDS.  We need to know which.
@@ -212,12 +220,13 @@ class data__sequence_record extends ChadoField {
 
       $entity->{$field_name}['und'][]['value'] = [
         $sequence_term => $seq['residues'],
-        $label_term => 'Full Length mRNA Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
-        $description_term => 'This full length mRNA sequence was extracted from the reference sequence location at ' . $coords['schema:description'] . ' and contains: ' . implode(', ', $types) ,
+        $label_term => 'Derived mRNA Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
+        $description_term => 'This full length mRNA sequence was derived by extracting bases from the reference sequence location at ' . $coords['schema:description'] . ' and contains: ' . implode(', ', $types) ,
         $seq_coords_term => $coords,
         $seq_length_term => strlen($seq['residues']),
         $seq_md5sum_term => md5($seq['residues']),
-        $type_term => 'mRNA'
+        $type_term => 'mRNA',
+        $fasta_defline => $seq['defline']
       ];
 
       $featurelocs[] = $featureloc;
@@ -226,9 +235,7 @@ class data__sequence_record extends ChadoField {
   }
 
   /**
-   *
-   * @param unknown $featureloc_id
-   * @return unknown
+   * Retrieves the feature location information.
    */
   private function getFeatureLoc($featureloc_id) {
     $featurelocs_sql = "
@@ -241,9 +248,7 @@ class data__sequence_record extends ChadoField {
   }
 
   /**
-   *
-   * @param unknown $featureloc
-   * @return string[]|number[]|NULL[]|unknown[]
+   * Gets the sequence location string for a featureloc record.
    */
   private function getSequenceCoords($featureloc) {
     $description_term = 'schema:description';
@@ -277,10 +282,7 @@ class data__sequence_record extends ChadoField {
   }
 
   /**
-   *
-   * @param unknown $entity
-   * @param unknown $feature
-   * @param unknown $field_name
+   * Adds the CDS sequence (only for an mRNA feature)
    */
   private function addCDS(&$entity, $feature, $field_name, $featurelocs) {
     $label_term = 'rdfs:label';
@@ -289,6 +291,7 @@ class data__sequence_record extends ChadoField {
     $sequence_term = chado_get_semweb_term('feature', 'residues');
     $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
     $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+    $fasta_defline = 'local:fasta_definition';
 
     foreach ($featurelocs as $featureloc) {
       $cds_feature = [
@@ -311,20 +314,18 @@ class data__sequence_record extends ChadoField {
         $entity->{$field_name}['und'][]['value'] = [
           $label_term => 'Coding Sequence (' . number_format($cds_sequence[0]['length']) . 'bp)',
           $sequence_term => $cds_sequence[0]['residues'],
-          $description_term => 'This CDS was extracted from the reference sequence location at ' . $coords['schema:description'] . '.' ,
+          $description_term => 'This CDS was derived by extracting bases from the reference sequence location at ' . $coords['schema:description'] . '.' ,
           $seq_length_term => $cds_sequence[0]['length'],
           $seq_md5sum_term => md5($cds_sequence[0]['residues']),
-          $type_term => 'CDS'
+          $type_term => 'CDS',
+          $fasta_defline => $cds_sequence[0]['defline']
         ];
       }
     }
   }
 
   /**
-   *
-   * @param unknown $entity
-   * @param unknown $feature
-   * @param unknown $field_name
+   * Adds the sequecne for a gene parent (only for gene children).
    */
   private function addGeneParent(&$entity, $feature, $field_name) {
     $label_term = 'rdfs:label';
@@ -334,6 +335,7 @@ class data__sequence_record extends ChadoField {
     $sequence_term = chado_get_semweb_term('feature', 'residues');
     $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
     $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+    $fasta_defline = 'local:fasta_definition';
 
     $sql = "
       SELECT FO.*
@@ -355,7 +357,8 @@ class data__sequence_record extends ChadoField {
           $label_term => 'The gene sequence.',
           $seq_length_term => strlen($gene->residues),
           $seq_md5sum_term => md5($gene->residues),
-          $type_term => 'polypeptide'
+          $type_term => 'gene',
+          $fasta_defline => chado_get_fasta_defline($gene),
         ];
       }
       else {
@@ -364,13 +367,14 @@ class data__sequence_record extends ChadoField {
           $featureloc = $this->getFeatureLoc($seq['featureloc_id']);
           $coords = $this->getSequenceCoords($featureloc);
           $entity->{$field_name}['und'][]['value'] = [
-            $label_term => 'Gene Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
+            $label_term => 'Derived Gene Sequence (' . number_format(strlen($seq['residues'])) . 'bp)',
             $sequence_term => $seq['residues'],
-            $description_term => 'This gene sequence was extracted from the reference sequence location at ' . $coords['schema:description'] . '.' ,
+            $description_term => 'This gene sequence was derived by extracting bases from the reference sequence location at ' . $coords['schema:description'] . '.' ,
             $seq_coords_term => $coords,
             $seq_length_term => strlen($seq['residues']),
             $seq_md5sum_term => md5($seq['residues']),
-            $type_term => 'gene'
+            $type_term => 'gene',
+            $fasta_defline => $seq['defline'],
           ];
         }
       }
@@ -378,10 +382,7 @@ class data__sequence_record extends ChadoField {
   }
 
   /**
-   *
-   * @param unknown $entity
-   * @param unknown $feature
-   * @param unknown $field_name
+   * Adds the protein sequence (only for mRNA features).
    */
   private function addProtein(&$entity, $feature, $field_name) {
     $label_term = 'rdfs:label';
@@ -390,6 +391,7 @@ class data__sequence_record extends ChadoField {
     $sequence_term = chado_get_semweb_term('feature', 'residues');
     $seq_length_term = chado_get_semweb_term('feature', 'seqlen');
     $seq_md5sum_term = chado_get_semweb_term('feature', 'md5checksum');
+    $fasta_defline = 'local:fasta_definition';
 
     $sql = "
       SELECT F.*
@@ -412,7 +414,8 @@ class data__sequence_record extends ChadoField {
           $description_term => 'The protein sequence.',
           $seq_length_term => strlen($protein->residues),
           $seq_md5sum_term => md5($protein->residues),
-          $type_term => 'polypeptide'
+          $type_term => 'polypeptide',
+          $fasta_defline => chado_get_fasta_defline($protein),
         ];
       }
     }

+ 6 - 12
tripal_chado/includes/TripalFields/data__sequence_record/data__sequence_record_formatter.inc

@@ -26,38 +26,32 @@ class data__sequence_record_formatter extends ChadoFieldFormatter {
     $fmax_term = chado_get_semweb_term('featureloc', 'fmax');
     $strand_term = chado_get_semweb_term('featureloc', 'strand');
     $phase_term = chado_get_semweb_term('featureloc', 'phase');
-
-    $content = [];
+    $fasta_defline = 'local:fasta_definition';
 
     foreach ($items as $delta => $item) {
-      if (empty($item['value'])) {
+      if (!$item['value']) {
         continue;
       }
 
       $num_bases = 50;
       $residues = '<pre class="residues-formatter">';
+      $residues .= ">" . $item['value'][$fasta_defline] . "<br>";
       $residues .= wordwrap($item['value'][$sequence_term], $num_bases, "<br>", TRUE);
       $residues .= '</pre>';
 
-      $content[] = [
+      $element[$delta] = [
         '#type' => 'item',
         '#title' => $item['value'][$label_term],
         '#description' => $item['value'][$description_term],
         '#markup' => $residues,
       ];
-
-
     }
 
-
-    if (empty($content)) {
+    if (count($element) == 0) {
       $element[0] = [
         '#type' => 'markup',
-        '#markup' => '',
+        '#markup' => 'There are no sequences for this feature.',
       ];
-      return;
     }
-
-    $element[0] = $content;
   }
 }

+ 36 - 26
tripal_chado/includes/TripalFields/sbo__database_cross_reference/sbo__database_cross_reference.inc

@@ -37,6 +37,10 @@ class sbo__database_cross_reference extends ChadoField {
     // type. This will create form elements when editing the field instance
     // to allow the site admin to change the term settings above.
     'term_fixed' => FALSE,
+    // The number of items to show on a page.
+    'items_per_page' => 10,
+    // Limit to the number of items to show in cases of large number of cross references.
+    'max_items' => 10000,
   ];
 
   // The default widget for this field.
@@ -138,33 +142,39 @@ class sbo__database_cross_reference extends ChadoField {
 
     $linker_table = $base_table . '_dbxref';
     $options = ['return_array' => 1];
-    $record = chado_expand_var($record, 'table', $linker_table, $options);
-    if (property_exists($record, $linker_table) and is_array($record->$linker_table) and count($record->$linker_table) > 0) {
-      $i = 0;
-      foreach ($record->$linker_table as $index => $linker) {
-        $dbxref = $linker->dbxref_id;
-
-        // Ignore the GFF_source database. This is a weird thing required by
-        // GBrowse and is added by the GFF loader. We don't want to show it.
-        if ($dbxref->db_id->name == 'GFF_source') {
-          continue;
-        }
-
-        $URL = chado_get_dbxref_url($dbxref);
-        $entity->{$field_name}['und'][$i] = [
-          'value' => [
-            $dbname_term => $dbxref->db_id->name,
-            $accession_term => $dbxref->accession,
-            $dburl_term => $URL,
-          ],
-          'chado-' . $field_table . '__' . $pkey => $linker->$pkey,
-          'chado-' . $field_table . '__' . $fkey_lcolumn => $linker->$fkey_lcolumn->$fkey_lcolumn,
-          'chado-' . $field_table . '__dbxref_id' => $dbxref->dbxref_id,
-          'db_id' => $dbxref->db_id->db_id,
-          'accession' => $dbxref->accession,
-        ];
-        $i++;
+
+    // Build the SQL to find records associated with this publication.
+    $max_items = array_key_exists('max_items', $this->instance['settings']) ? $this->instance['settings']['max_items'] : 10000;
+    $sql = "SELECT REF.accession, DB.name, DB.urlprefix "
+         . "FROM {".$linker_table."} LINK "
+         . "INNER JOIN {dbxref} REF on LINK.dbxref_id = REF.dbxref_id "
+         . "INNER JOIN {db} DB on REF.db_id = DB.db_id "
+         . "WHERE LINK.$fkey_lcolumn = :id "
+         // Ignore the GFF_source database. This is a weird thing required by
+         // GBrowse and is added by the GFF loader. We don't want to show it.
+         . "AND NOT DB.name = 'GFF_source' "
+         . "ORDER BY REF.accession "  // if we hit the limit, the subset should be consistent
+         . "LIMIT :limit";
+    $args = [':id' => $entity->{'chado_record_id'},
+             ':limit' => $max_items + 1];
+    $records = chado_query($sql, $args);
+
+    // Store the query results
+    $delta = 0;
+    while($record = $records->fetchObject()) {
+      // Need this check to detect the case where the limit exactly equals the number of records
+      if ($delta < $max_items) {
+        $entity->{$field_name}['und'][$delta] = [];
+        $entity->{$field_name}['und'][$delta]['value'][$dbname_term] = $record->name;
+        $entity->{$field_name}['und'][$delta]['value'][$accession_term] = $record->accession;
+        $entity->{$field_name}['und'][$delta]['value'][$dburl_term] = $record->urlprefix;
       }
+      $delta++;
+    }
+    // Display a warning if we have exceeded the maximum number of cross references
+    if ( $delta > $max_items ) {
+      $entity->{$field_name}['und'][$delta]['value'][$dbname_term] = 'Note';
+      $entity->{$field_name}['und'][$delta]['value'][$accession_term] = "Only the first $max_items cross references are shown";
     }
   }
 

+ 76 - 15
tripal_chado/includes/TripalFields/sbo__database_cross_reference/sbo__database_cross_reference_formatter.inc

@@ -15,6 +15,11 @@ class sbo__database_cross_reference_formatter extends ChadoFieldFormatter {
   public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
     $content = '';
 
+    // Do we have an empty list? If so, just return.
+    if (!$items[0]['value']) {
+      return;
+    }
+
     $field_name = $this->field['field_name'];
     $field_type = $this->field['type'];
     $field_table = $this->instance['settings']['chado_table'];
@@ -26,27 +31,83 @@ class sbo__database_cross_reference_formatter extends ChadoFieldFormatter {
     $accession_term = chado_get_semweb_term('dbxref', 'accession');
     $dburl_term = chado_get_semweb_term('db', 'url');
 
+    // First, organize the values by their databases.
+    $ordered_items = [];
     foreach ($items as $delta => $item) {
-      if (!$item['value']) {
-        continue;
+      $db = $item['value'][$dbname_term];
+      $accession = $item['value'][$accession_term];
+      // It is possible that a database does not have a url, in which case no link can be generated.
+      if (array_key_exists($dburl_term, $item['value']) and $item['value'][$dburl_term]) {
+        $url = $item['value'][$dburl_term];
+
+        // This emulates chado_get_dbxref_url() but implemented inline here for better performance.
+        $db_count = 0;
+        $acc_count = 0;
+        $url = preg_replace('/\{db\}/', $db, $url, -1, $acc_count);
+        $url = preg_replace('/\{accession\}/', $accession, $url, -1, $acc_count);
+
+        $content = l($accession, $url, ['attributes' => ['target' => '_blank']]);
       }
-      $content = $item['value'][$dbname_term] . ':' . $item['value'][$accession_term];
-      if ($item['value'][$dburl_term]) {
-        $dbxref = chado_get_dbxref(['dbxref_id' => $item['chado-' . $linker_table . '__dbxref_id']]);
-        $url = chado_get_dbxref_url($dbxref);
-        $content = l($content, $url, ['attributes' => ['target' => '_blank']]);
+      else {
+        $content = $accession;
       }
-      $element[$delta] = [
-        '#type' => 'markup',
-        '#markup' => $content,
-      ];
+      $ordered_items[ucfirst($db)][] = $content;
     }
 
-    if (count($element) == 0) {
-      $element[0] = [
-        '#type' => 'markup',
-        '#markup' => 'There are no cross references.',
+    // Reorder the list so it's compatible with theming a list.
+    ksort($ordered_items);
+
+    // Generate the pagers for each type.
+    $list_items = [];
+    $headers = [];
+    $rows = [];
+    foreach ($ordered_items as $type => $children) {
+      $items_per_page = array_key_exists('items_per_page', $this->instance['settings']) ? $this->instance['settings']['items_per_page'] : 10;
+      $total_records = count($children);
+      $total_pages = (int) ($total_records / $items_per_page) + 1;
+      $pelement = 0;
+      $current_page = pager_default_initialize($total_records, $items_per_page, $pelement);
+      $pager = theme('pager', [
+        'tags' => [],
+        'element' => $pelement,
+        'parameters' => [],
+        'quantity' => 5,
+      ]);
+      $pager = $this->ajaxifyPager($pager, $entity);
+      $page_items = array_chunk($children, $items_per_page);
+
+      $rows[] = [
+        [
+          'data' => ucfirst($type),
+          'header' => TRUE,
+          'width' => '20%',
+        ],
+        theme_item_list([
+          'items' => $page_items[$current_page],
+          'title' => '',
+          'type' => 'ul',
+          'attributes' => [],
+        ]) . $pager,
       ];
     }
+
+    $table = [
+      'header' => [],
+      'rows' => $rows,
+      'attributes' => [
+        'id' => 'sbo__database_cross_reference',
+        'class' => 'tripal-data-table',
+      ],
+      'sticky' => FALSE,
+      'caption' => "",
+      'colgroups' => [],
+      'empty' => 'There are no cross references.',
+    ];
+    $content = theme_table($table);
+    $element[0] = [
+      '#type' => 'markup',
+      '#markup' => $content,
+    ];
+
   }
 }

+ 8 - 5
tripal_chado/includes/TripalFields/sbo__relationship/sbo__relationship.inc

@@ -433,14 +433,15 @@ class sbo__relationship extends ChadoField {
     $object_type = $entity->{$field_name}['und'][$delta]['value']['local:relationship_object']['rdfs:type'];
     $object_name = $entity->{$field_name}['und'][$delta]['value']['local:relationship_object']['schema:name'];
 
-
     // Remember the current entity could be either the subject or object!
     // Example: The genetic_marker, MARKER1 , derives from the sequence_variant, VARIANT1.
     // The above relationship will be shown both on marker and variant pages
     // and as such both subject and object names need to be shown.
+    // Keep the clause as an array for now so we can apply formatting
+    // and links in the formatter
     $clause = 'The ' . $subject_type . ', ' .
-      $subject_name . ', ' . $verb . ' ' . $rel_type_clean . ' ' .
-      $object_type . ', ' . $object_name . '.';
+        $subject_name . ', ' . $verb . ' ' . $rel_type_clean . ' ' .
+        $object_type . ', ' . $object_name . '.';
     $entity->{$field_name}['und'][$delta]['value']['SIO:000493'] = $clause;
 
     // Adding a label allows us to provide a single text value for the
@@ -524,7 +525,7 @@ class sbo__relationship extends ChadoField {
       // relationship type and the object and subject
       'include_fk' => [
         'type_id' => 1,
-          $object_id_key => [
+        $object_id_key => [
           'type_id' => 1,
           'organism_id' => 1,
         ],
@@ -645,7 +646,7 @@ class sbo__relationship extends ChadoField {
    * @return
    *   The verb to use when creating a sentence of the relationship.
    */
-  private function get_rel_verb($rel_type) {
+  public static function get_rel_verb($rel_type) {
     $rel_type_clean = lcfirst(preg_replace('/_/', ' ', $rel_type));
     $verb = '';
     switch ($rel_type_clean) {
@@ -658,6 +659,7 @@ class sbo__relationship extends ChadoField {
       case 'genome of':
       case 'part of':
         $verb = 'is a';
+        break;
       case 'position of':
       case 'sequence of':
       case 'variant of':
@@ -674,6 +676,7 @@ class sbo__relationship extends ChadoField {
       case 'is a':
       case 'is a maternal parent of':
       case 'is a paternal parent of':
+      case 'is a subproject of':
       case 'is consecutive sequence of':
       case 'maximally overlaps':
       case 'overlaps':

+ 21 - 13
tripal_chado/includes/TripalFields/sbo__relationship/sbo__relationship_formatter.inc

@@ -59,6 +59,9 @@ class sbo__relationship_formatter extends ChadoFieldFormatter {
    */
   public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
 
+    // Load the Field class into scope for use below.
+    tripal_load_include_field_class('sbo__relationship');
+
     // Get the settings
     $settings = $display['settings'];
     $rows = [];
@@ -72,7 +75,11 @@ class sbo__relationship_formatter extends ChadoFieldFormatter {
       $subject_type = $item['value']['local:relationship_subject']['rdfs:type'];
       $object_name = $item['value']['local:relationship_object']['schema:name'];
       $object_type = $item['value']['local:relationship_object']['rdfs:type'];
-      $phrase = $item['value']['SIO:000493'];
+      $rel_type = $item['value']['local:relationship_type'];
+
+      // Get some better human-readable formats for the type and verb.
+      $verb = sbo__relationship::get_rel_verb($rel_type);
+      $rel_type_clean = lcfirst(preg_replace('/_/', ' ', $rel_type));
 
       // Handle some special cases.
       // For mRNA objects we don't want to show the CDS, exons, 5' UTR, etc.
@@ -87,31 +94,33 @@ class sbo__relationship_formatter extends ChadoFieldFormatter {
         continue;
       }
 
-      // Add bold font to the object and subject names.
-      // @todo add back in bolding...
-      // @todo Fix Current Bug: if type name is in the object name, weird bolding happens.
-      // $phrase = preg_replace("/$subject_type/", "<b>$subject_type</b>", $phrase);
-      // $phrase = preg_replace("/$object_type/", "<b>$object_type</b>", $phrase);
-
       // Convert the object/subject to a link if an entity exists for it.
       if (array_key_exists('entity', $item['value']['local:relationship_object'])) {
         list($entity_type, $object_entity_id) = explode(':', $item['value']['local:relationship_object']['entity']);
 
         if ($object_entity_id != $entity->id) {
-          $link = l($object_name, 'bio_data/' . $object_entity_id);
-          $phrase = str_replace($object_name, $link, $phrase);
+          $object_name = l($object_name, 'bio_data/' . $object_entity_id);
         }
       }
       if (array_key_exists('entity', $item['value']['local:relationship_subject'])) {
         list($entity_type, $subject_entity_id) = explode(':', $item['value']['local:relationship_subject']['entity']);
         if ($subject_entity_id != $entity->id) {
-          $link = l($subject_name, 'bio_data/' . $subject_entity_id);
-          $phrase = str_replace($subject_name, $link, $phrase);
+          $subject_name = l($subject_name, 'bio_data/' . $subject_entity_id);
         }
       }
 
+      // Add bold font to the subject and object names.
+      $subject_name = '<b>' . $subject_name . '</b>';
+      $object_name = '<b>' . $object_name . '</b>';
+
+      // The $clause text here will match what is already in $item['value']['SIO:000493']
+      // but we rebuild it here to incorporated links and formatting
+      $clause = 'The ' . $subject_type . ', ' .
+        $subject_name . ', ' . $verb . ' ' . $rel_type_clean . ' ' .
+        $object_type . ', ' . $object_name . '.';
+
       $rows[][] = [
-        'data' => $phrase,
+        'data' => $clause,
         //'class' => array('tripal-entity-unattached field-items')
       ];
     }
@@ -157,4 +166,3 @@ class sbo__relationship_formatter extends ChadoFieldFormatter {
   }
 }
 
-

+ 44 - 35
tripal_chado/includes/TripalImporter/GFF3Importer.inc

@@ -306,17 +306,6 @@ class GFF3Importer extends TripalImporter {
       '#required' => TRUE,
       '#options' => $organisms,
     ];
-    $form['create_organism'] = [
-      '#type' => 'checkbox',
-      '#title' => t('Create organism'),
-      '#required' => FALSE,
-      '#description' => t('The Tripal GFF loader supports the "organism" attribute. This allows features of a
-       different organism to be aligned to the landmark sequence.  The format of the
-       attribute is "organism=[genus]:[species]", where [genus] is the organism\'s genus and [species] is the
-       species name. Check this box to automatically add the organism to the database if it does not already exists.
-       Otherwise lines with an organism attribute where the organism is not present in the database will be skipped.'),
-    ];
-
     $form['landmark_type'] = [
       '#title' => t('Landmark Type'),
       '#type' => 'textfield',
@@ -325,6 +314,7 @@ class GFF3Importer extends TripalImporter {
        the landmark features (first column of the GFF3 file) are not already in the database.."),
     ];
 
+
     $form['proteins'] = [
       '#type' => 'fieldset',
       '#title' => t('Proteins'),
@@ -410,7 +400,16 @@ class GFF3Importer extends TripalImporter {
       '#collapsible' => TRUE,
       '#collapsed' => FALSE,
     ];
-
+    $form['advanced']['create_organism'] = [
+      '#type' => 'checkbox',
+      '#title' => t('Create organism'),
+      '#required' => FALSE,
+      '#description' => t('The Tripal GFF loader supports the "organism" attribute. This allows features of a
+       different organism to be aligned to the landmark sequence.  The format of the
+       attribute is "organism=[genus]:[species]", where [genus] is the organism\'s genus and [species] is the
+       species name. Check this box to automatically add the organism to the database if it does not already exists.
+       Otherwise lines with an organism attribute where the organism is not present in the database will be skipped.'),
+    ];
 
     $form['advanced']['line_number'] = [
       '#type' => 'textfield',
@@ -2033,7 +2032,7 @@ class GFF3Importer extends TripalImporter {
     $batch_num = 1;
     $sql = '';
     $args = [];
-    foreach ($this->parent_lookup as $parent => $children) {
+    foreach ($this->parent_lookup as $parent => $starts) {
       $total++;
       $i++;
 
@@ -2041,21 +2040,22 @@ class GFF3Importer extends TripalImporter {
       $parent_uniquename = $parent_feature['uniquename'];
       $parent_feature_id = $this->features[$parent_uniquename]['feature_id'];
       if (!$parent_feature['skipped']) {
-
-        foreach ($children as $child_findex) {
-          $j++;
-          $child_feature = $this->getCachedFeature($child_findex);
-          $child_uniquename = $child_feature['uniquename'];
-          $child_feature_id = $this->features[$child_uniquename]['feature_id'];
-          $type_id = $part_of;
-          if ($child_feature['type'] == 'polypeptide' or $child_feature['type'] == 'protein') {
-            $type_id = $derives_from;
+        foreach ($starts as $start => $children) {
+          foreach ($children as $child_findex) {
+            $j++;
+            $child_feature = $this->getCachedFeature($child_findex);
+            $child_uniquename = $child_feature['uniquename'];
+            $child_feature_id = $this->features[$child_uniquename]['feature_id'];
+            $type_id = $part_of;
+            if ($child_feature['type'] == 'polypeptide' or $child_feature['type'] == 'protein') {
+              $type_id = $derives_from;
+            }
+            $sql .= "(:subject_id_$j, :object_id_$j, :type_id_$j, :rank_$j),\n";
+            $args[":subject_id_$j"] = $child_feature_id;
+            $args[":object_id_$j"] = $parent_feature_id;
+            $args[":type_id_$j"] = $type_id;
+            $args[":rank_$j"] = $this->features[$child_uniquename]['rank'];
           }
-          $sql .= "(:subject_id_$j, :object_id_$j, :type_id_$j, :rank_$j),\n";
-          $args[":subject_id_$j"] = $child_feature_id;
-          $args[":object_id_$j"] = $parent_feature_id;
-          $args[":type_id_$j"] = $type_id;
-          $args[":rank_$j"] = $this->features[$child_uniquename]['rank'];
         }
       }
 
@@ -2151,7 +2151,15 @@ class GFF3Importer extends TripalImporter {
         // Place features in order that they appear by their start coordinates.
         $parent = $feature['parent'];
         $start = $feature['start'];
-        $this->parent_lookup[$parent][$start] = $info['findex'];
+        // We can have multiple children that start at the same location
+        // so we'll store children in an array indexed by start position.
+        if (!array_key_exists($parent, $this->parent_lookup)) {
+          $this->parent_lookup[$parent] = [];
+        }
+        if (!array_key_exists($start, $this->parent_lookup[$parent])) {
+          $this->parent_lookup[$parent][$start] = [];
+        }
+        $this->parent_lookup[$parent][$start][] = $info['findex'];
       }
       $this->setItemsHandled($i);
     }
@@ -2168,15 +2176,16 @@ class GFF3Importer extends TripalImporter {
     $this->setItemsHandled(0);
     $this->setTotalItems(count(array_keys($this->parent_lookup)));
     $i = 0;
-    foreach ($this->parent_lookup as $parent => $children) {
-      $starts = array_keys($children);
+    foreach ($this->parent_lookup as $parent => $starts) {
+      $starts = array_keys($starts);
       sort($starts);
       $j = 0;
       foreach ($starts as $start) {
-        $child_findex = $children[$start];
-        $child = $this->getCachedFeature($child_findex);
-        $this->features[$child['uniquename']]['rank'] = $j;
-        $j++;
+        foreach ($this->parent_lookup[$parent][$start] as $child_findex) {
+          $child = $this->getCachedFeature($child_findex);
+          $this->features[$child['uniquename']]['rank'] = $j;
+          $j++;
+        }
       }
       $this->setItemsHandled($j);
     }
@@ -2593,7 +2602,7 @@ class GFF3Importer extends TripalImporter {
     }
 
     // Get the organism object.
-    [$genus, $species] = explode(':', $organism_attr, 2);
+    list($genus, $species) = explode(':', $organism_attr, 2);
     $organism = new ChadoRecord('organism');
     $organism->setValues([
       'genus' => $genus,

+ 10 - 0
tripal_chado/includes/tripal_chado.field_storage.inc

@@ -398,6 +398,16 @@ function tripal_chado_field_storage_load($entity_type, $entities, $age,
             // as the column value.
             $entity->{$field_name}['und'][0]['value'] = $record->$field_column;
             $entity->{$field_name}['und'][0]['chado-' . $field_table . '__' . $field_column] = $record->$field_column;
+
+            // For datetime columns we need to strip out the milliseconds if
+            // they are there because the Drupal date field can't deal with
+            // miliseconds and causes the displayed time to rever to the
+            // current time.
+            if ($field_type == 'datetime') {
+              $datevalue = $entity->{$field_name}['und'][0]['value'];
+              $datevalue = preg_replace('/^(\d+-\d+-\d+ \d+:\d+:\d+)\.\d+$/', '\1', $datevalue);
+              $entity->{$field_name}['und'][0]['value'] = $datevalue;
+            }
           }
         }
 

+ 11 - 0
tripal_chado/includes/tripal_chado.semweb.inc

@@ -1457,6 +1457,17 @@ function tripal_chado_populate_vocab_LOCAL() {
     'db_name' => 'local',
   ]);
 
+  //--------
+  // Feature
+  //--------
+  $term = chado_insert_cvterm([
+    'name' => 'fasta_definition',
+    'definition' => 'The definition line for a FASTA formatted sequence',
+    'cv_name' => 'local',
+    'is_relationship' => 0,
+    'db_name' => 'local',
+  ]);
+
   //--------------
   // Feature Map
   //--------------

+ 21 - 2
tripal_chado/tripal_chado.install

@@ -2081,7 +2081,7 @@ function tripal_chado_update_7338() {
  */
 function tripal_chado_update_7339() {
   try {
-    $term = chado_insert_cvterm([
+    chado_insert_cvterm([
       'id' => 'data:0849',
       'name' => 'Sequence record',
       'cv_name' => 'EDAM',
@@ -2092,4 +2092,23 @@ function tripal_chado_update_7339() {
     $error = $e->getMessage();
     throw new DrupalUpdateException('Could not perform update: '. $error);
   }
-}
+}
+
+
+/**
+ * Adds the "FASTA definition" cvterm for the data__sequence_record field.
+ */
+function tripal_chado_update_7340() {
+  try {
+    chado_insert_cvterm([
+      'name' => 'fasta_definition',
+      'definition' => 'The definition line for a FASTA formatted sequence',
+      'cv_name' => 'local',
+      'is_relationship' => 0,
+      'db_name' => 'local',
+    ]);
+  } catch (\PDOException $e) {
+    $error = $e->getMessage();
+    throw new DrupalUpdateException('Could not perform update: '. $error);
+  }
+}

+ 1 - 1
tripal_ds/api/tripal_ds.pane.api.inc

@@ -39,7 +39,7 @@ function tripal_ds_create_field($field_label, $field_name, $bundle_name) {
   //Build the rest of the passes parameters.
   $group_field_name = 'gp_' . $field_name;
   //Create the field groups.
-  tripal_ds_additional_fields_field_group_info($bundle_name, $field_label, $group_field_name, $field_name);
+  tripal_ds_add_generic_field_group($bundle_name, $field_label, $group_field_name, $field_name);
   //Place the field groups in the layout.
   tripal_ds_update_ds_layout($bundle_name, $field_name, $group_field_name);
 }

+ 80 - 109
tripal_ds/includes/tripal_ds.ds.inc

@@ -41,117 +41,113 @@ function _ds_layout_settings_info($bundle_name, $instances) {
   $region_left = [];
   $prop_fields = [];
   $summary_fields = [];
-  $data_sequence_fields = [];
-  $all_other_fields = [];
+  $unhandled_fields = [];
   $fields_with_regions = [];
   $i = 0;
-  $all_fields = [];
+
 
   try {
+
     // Get the bundle and term objects.
     $bundle = tripal_load_bundle_entity(['name' => $bundle_name]);
     $term = tripal_load_term_entity(['term_id' => $bundle->term_id]);
 
-    // Build one large multidimensional array of all instances to sort in alpha
-    // order to display fields in label alpha order.
+    // Get the list of fields and sort them by label.
+    $sorted_instances = [];
     foreach ($instances as $key => $instance) {
-      $all_fields[$i] = $instance;
+      $sorted_instances[$i] = $instance;
       $i++;
     }
-    usort($all_fields, tripal_ds_sort_array('label'));
+    usort($sorted_instances, tripal_ds_sort_array('label'));
 
-    // Iterate through the fields of this bundle.
-    foreach ($all_fields as $key => $instance) {
+    // Iterate through the fields of this bundle and put them into appropriate
+    // field groups.
+    foreach ($sorted_instances as $key => $instance) {
+      $base_table = array_key_exists('base_table', $instance['settings']) ? $instance['settings']['base_table'] : '';
+      $chado_table = array_key_exists('chado_table', $instance['settings']) ? $instance['settings']['chado_table'] : '';
       $instance_name = $instance['field_name'];
+
+      // Add the bundle type to the summary fields.
       if ($instance_name == "rdfs__type") {
         array_push($summary_fields, $instance_name);
+        continue;
       }
-      else {
-        //TODO: How do we handle non-chado dbs, placement of fields within
-        // tripal panes might need to be done in a hook.
-        $instance_base_table = array_key_exists('base_table', $instance['settings']) ? $instance['settings']['base_table'] : '';
-        $instance_base_chado = array_key_exists('chado_table', $instance['settings']) ? $instance['settings']['chado_table'] : '';
-        $prop_table = strpos($instance_base_chado, 'prop');
-        $data_sequence_record = strpos($instance_name, 'data__sequence_record');
-        $data_sequence = strpos($instance_name, 'data__sequence');
-        if ($instance_base_chado && $instance_base_table) {
-
-          if ($instance_base_chado == $instance_base_table) {
-            if ($data_sequence_record !== FALSE) {
-              array_push($data_sequence_fields, $instance_name);
-            }
-            elseif ($data_sequence !== FALSE or $instance_name == 'so__cds' or $instance_name == 'data__protein_sequence') {
-              // Do nothing as these chado column sequence fields
-              // should be hidden by default.
-            }
-            elseif ($prop_table !== FALSE) {
-              array_push($prop_fields, $instance_name);
-            }
-            else {
-              array_push($summary_fields, $instance_name);
-            }
 
-          }
-          elseif ($instance_base_chado != $instance_base_table) {
-            if ($data_sequence_record !== FALSE) {
-              array_push($data_sequence_fields, $instance_name);
-            }
-            elseif ($data_sequence !== FALSE or $instance_name == 'so__cds' or $instance_name == 'data__protein_sequence') {
-              // Do nothing as these chado column sequence fields
-              // should be hidden by default.
-            }
-            elseif ($prop_table !== FALSE) {
-              array_push($prop_fields, $instance_name);
-            }
-            else {
-              array_push($all_other_fields, $instance);
+      // The tripal_chado module adds an image to the organism content
+      // type so we want to make sure that image goes in the summary.
+      // It is not a TripalField so it won't have a chado table.
+      if ($instance_name == 'data__image' and $term->name == 'organism') {
+        array_push($summary_fields, $instance_name);
+        continue;
+      }
 
-              // Update the display settings so that the title is hidden.
-              $instance['display']['default']['label'] = 'hidden';
-              field_update_instance($instance);
-            }
-          }
-        }
-        else {
-          // The tripal_chado module adds an image to the organism content
-          // type so we want to make sure that image goes in the summary.
-          // It is not a TripalField so it won't have a chado table.
-          if ($instance_name == 'data__image' and $term->name == 'organism') {
-            array_push($summary_fields, $instance_name);
-          }
+      // For fields that have a data__sequence prefix we want to hide these
+      // unless it's the data__sequence_record field which displays all of the
+      // sequence data and should be in it's own fieldset.
+      if (preg_match('/data__sequence/', $instance_name)) {
+        if ($instance_name == 'data__sequence_record') {
+          $instance['display']['default']['label'] = 'hidden';
+          field_update_instance($instance);
+          array_push($unhandled_fields, $instance);
         }
+        continue;
+      }
+
+      // For the same reason, hide the CDS and protein fields
+      if ($instance_name == 'so__cds' or $instance_name == 'data__protein_sequence'){
+        continue;
+      }
+
+      // If this field holds a value from the prop table then add it to the
+      // properties field group.
+      if (preg_match('/_prop$/', $chado_table)) {
+        array_push($prop_fields, $instance_name);
+        continue;
       }
-    }
 
-    // Consolidate the field sets.
+      // Put all fields from the base table in the summary and all other
+      // fields that were not handled above into a different array for
+      // handling later.
+      if ($base_table == $chado_table) {
+        array_push($summary_fields, $instance_name);
+        continue;
+      }
+      else {
+        // Update the display settings so that the title is hidden.
+        $instance['display']['default']['label'] = 'hidden';
+        field_update_instance($instance);
+        array_push($unhandled_fields, $instance);
+      }
+    } // end looping over fields.
+
+    // Create the summary and properties field groups.
     if (!empty($summary_fields)) {
-      _summary_field_group_info($bundle_name, $summary_fields);
+      tripal_ds_add_summary_field_group($bundle_name, $summary_fields);
     }
     if (!empty($prop_fields)) {
-      _prop_field_group_info($bundle_name, $prop_fields);
-    }
-    if (!empty($data_sequence_fields)) {
-      _data_sequence_field_group_info($bundle_name, $data_sequence_fields);
+      tripal_ds_add_prop_field_group($bundle_name, $prop_fields);
     }
-    if (!empty($all_other_fields)) {
-      foreach ($all_other_fields as $key => $other_field) {
-        $group_field_name = 'gp_' . $other_field['field_name'];
-
-        // Need to truncate the names because of database field size restrictions,
-        // updating fields here to ensure name consistency.
-        $group_field_name = substr($group_field_name, 0, 27);
-
-        // Add random numbers to ensure the field name is unique within the 32
-        // character limit of the field.
-        $group_field_name = $group_field_name . rand(0, 99999);
-        tripal_ds_additional_fields_field_group_info($bundle_name, $other_field['label'], $group_field_name, $other_field['field_name']);
-      }
+
+    // All other fields get their own field group (i.e. Tripal Pane).
+    foreach ($unhandled_fields as $key => $other_field) {
+      $group_field_name = 'gp_' . $other_field['field_name'];
+
+      // Need to truncate the names because of database field size restrictions,
+      // updating fields here to ensure name consistency.
+      $group_field_name = substr($group_field_name, 0, 27);
+
+      // Add random numbers to ensure the field name is unique within the 32
+      // character limit of the field.
+      $group_field_name = $group_field_name . rand(0, 99999);
+
+      // Now add a generic field group for this field.
+      tripal_ds_add_generic_field_group($bundle_name, $other_field['label'], $group_field_name, $other_field['field_name']);
     }
 
-    // Build one large multidimensional array of all instances to sort in alpha
-    // order to display fields in label alpha order.
+    // Build one large multidimensional array of all field groups to sort
+    // in alpha order to display fields in label alpha order.
     $right_fields = [];
-    $all_field_groups = field_group_info_groups('TripalEntity', $bundle_name);
+    $all_field_groups = field_group_info_groups('TripalEntity', $bundle_name, NULL, TRUE);
 
     if (!empty($all_field_groups)) {
       if (is_array($all_field_groups)) {
@@ -164,31 +160,6 @@ function _ds_layout_settings_info($bundle_name, $instances) {
         usort($right_fields, tripal_ds_sort_object('label'));
       }
     }
-    elseif (empty($all_field_groups)) {
-      //Add the original instances that were passed and the field_groups that
-      //were created.
-      $field_group_fields = db_select('field_group', 'fg')
-        ->fields('fg', ['group_name', 'data'])
-        ->condition('bundle', $bundle_name, '=')
-        ->execute()
-        ->fetchAll();
-
-      $instance_names = [];
-      $field_group_names = [];
-      foreach ($all_fields as $key => $instance) {
-        $instance_names[$key]['field_name'] = $instance['field_name'];
-        $instance_names[$key]['label'] = $instance['label'];
-
-      }
-      foreach ($field_group_fields as $key => $field_group_name) {
-        $data = unserialize($field_group_name->data);
-        $field_group_names[$key]['field_name'] = $field_group_name->group_name;
-        $field_group_names[$key]['label'] = $data['format_settings']['label'];
-
-      }
-      $all_field_groups = array_merge($instance_names, $field_group_names);
-      usort($all_field_groups, tripal_ds_sort_array('label'));
-    }
 
     // Now build the $region_right array and the fields array.
     $i = 0;
@@ -337,7 +308,7 @@ function _ds_layout_pub_settings_info($bundle_name, $instances) {
       $group_field_name = $group_field_name . rand(0, 99999);
 
       // Build the field group.
-      tripal_ds_additional_fields_field_group_info($bundle_name, $other_field['label'], $group_field_name, $other_field['field_name']);
+      tripal_ds_add_generic_field_group($bundle_name, $other_field['label'], $group_field_name, $other_field['field_name']);
 
       // Update arrays.
       array_push($temporary_field, $group_field_name, $other_field['field_name']);

+ 5 - 5
tripal_ds/includes/tripal_ds.field_group.inc

@@ -7,7 +7,7 @@
  * @param $fields
  *  Array of the machine names of the children of the field group being created.
  */
-function _summary_field_group_info($bundle_name, $fields) {
+function tripal_ds_add_summary_field_group($bundle_name, $fields) {
   //Tripal pane to nest the summary fieldset within.
   $field_group_tripalpane = new stdClass();
   $field_group_tripalpane->disabled = FALSE; /* Edit this to true to make a default field_group disabled initially*/
@@ -80,7 +80,7 @@ function _summary_field_group_info($bundle_name, $fields) {
  * @param $fields
  *  Array of the machine names of the children of the field group being created.
  */
-function _prop_field_group_info($bundle_name, $fields) {
+function tripal_ds_add_prop_field_group($bundle_name, $fields) {
   //Tripal pane  to nest the fieldset within.
   $field_group_tripalpane = new stdClass();
   $field_group_tripalpane->disabled = FALSE; /* Edit this to true to make a default field_group disabled initially*/
@@ -152,7 +152,7 @@ function _prop_field_group_info($bundle_name, $fields) {
  * @param $fields
  *  Array of the machine names of the children of the field group being created.
  */
-function _data_sequence_field_group_info($bundle_name, $fields) {
+function tripal_ds_add_sequences_field_group($bundle_name, $fields) {
   //Tripal pane  to nest the fieldset within.
   $field_group_tripalpane = new stdClass();
   $field_group_tripalpane->disabled = FALSE; /* Edit this to true to make a default field_group disabled initially*/
@@ -182,7 +182,7 @@ function _data_sequence_field_group_info($bundle_name, $fields) {
   ];
   drupal_write_record('field_group', $field_group_tripalpane);
   //Write to the tripal_ds table to record the new tripal pane.
-  tripal_ds_bundle_menu_item($bundle_name, 'Sequence', 'group_sequence_tripalpane', 'TripalEntity');
+  tripal_ds_bundle_menu_item($bundle_name, 'Sequences', 'group_sequence_tripalpane', 'TripalEntity');
 
   //Table of fields.
   $field_group = new stdClass();
@@ -230,7 +230,7 @@ function _data_sequence_field_group_info($bundle_name, $fields) {
  * @param $field_name
  *  Machine name of the child element.
  */
-function tripal_ds_additional_fields_field_group_info($bundle_name, $field_label, $group_field_name, $field_name) {
+function tripal_ds_add_generic_field_group($bundle_name, $field_label, $group_field_name, $field_name) {
   //Write to the tripal_ds table to record the new tripal pane.
   tripal_ds_bundle_menu_item($bundle_name, $field_label, $group_field_name, 'TripalEntity');