Browse Source

Merge branch 'gff3_performance' of git://github.com/par12005/tripal into par12005-gff3_performance

Stephen Ficklin 4 years ago
parent
commit
9469d3c61d

+ 2 - 0
tests/tripal_chado/data/short_scaffold.fasta

@@ -0,0 +1,2 @@
+>scaffold1
+CAACAAGAAGTAAGCATAGGTTAATTATCATCCACGCATATTAATCAAGAATCGATGCTCGATTAATGTTTTTGAATTGACAAACAAAAGTTTTGTAAAAAGGACTTGTTGGTGGTGGTGGGGTGGTGGTGATGGTGTGGTGGGTAGGTCGCTGGTCGTCGCCGGCGTGGTGGAAGTCTCGCTGGCCGGTGTCTCGGCGGTCTGGTGGCGGCTGGTGGCGGTAGTTGTGAGTTTTTTCTTTCTTTTTTTGTTTTTTTTTTTTACTTTTTACTTTTTTTTCGTCTTGAACAAATTAAAAATAGAGTTTGTTTGTATTTGGTTATTATTTATTGATAAGGGTATATTCGTCCTGTTTGGTCTTGATGTAATAAAATTAAATTAATTTACGGGCTTCAACTAATAAACTCCTTCATGTTGGTTTGAACTAATAAAAAAAGGGGAAATTTGCTAGACACCCCTAATTTTGGACTTATATGGGTAGAAGTCCTAGTTGCTAGATGAATATAGGCCTAGGTCCATCCACATAAAAAAATAATATAAATTAAATAATAAAAATAATATATAGACATAAGTACCCTTATTGAATAAACATATTTTAGGGGATTCAGTTATATACGTAAAGTTGGGAAATCAAATCCCACTAATCACGATTGAAGGCAGAGTATCGTGTAAGACGTTTGGAAAACATATCTTAGTCGATTCCAGTGGAATATGAGATCA

+ 5 - 0
tests/tripal_chado/data/small_gene.gff

@@ -0,0 +1,5 @@
+##gff-version 3
+scaffold1	test_gene_001	gene	100	200	.	+	.	ID=test_gene_001;Name=test_gene_001;biotype=protein_coding;Alias=first_test_gene;Dbxref=TEST_DB:test_gene_dbx_001;Ontology_term=SO:0000704;Target=scaffold1 100 200;target_type=supercontig;Gap=test_gap_1;Gap=test_gap_2;Note=test_gene_001_note
+scaffold1	test_mrna_001	mRNA	100	150	.	+	.	ID=test_mrna_001.1;Parent=test_gene_001;Name=test_mrna_001;biotype=protein_coding;AED=0.05
+scaffold1	test_protein_001	polypeptide	100	150	.	+	.	ID=test_protein_001.1;Parent=test_mrna_001.1
+scaffold1	test_gene_002	gene	300	400	.	+	.	ID=test_gene_002;Name=test_gene_002;biotype=protein_coding;Derives_from=test_gene_001

+ 335 - 2
tests/tripal_chado/loaders/GFF3ImporterTest.php

@@ -48,6 +48,337 @@ class GFF3ImporterTest extends TripalTestCase {
     $this->assertEquals($name, $query);
   }
 
+  /**
+   * Run the GFF loader on small_gene.gff for testing.
+   *
+   * This gff has many attributes that we would like to test in the
+   * testGFFImporterAttribute*() methods.
+   */
+  private function initGFFImporterAttributes() {
+    $gff = ['file_local' => __DIR__ . '/../data/small_gene.gff'];
+    $fasta = ['file_local' => __DIR__ . '/../data/short_scaffold.fasta'];
+    $analysis = factory('chado.analysis')->create();
+    $organism = factory('chado.organism')->create();
+    $run_args = [
+      'analysis_id' => $analysis->analysis_id,
+      'organism_id' => $organism->organism_id,
+      'use_transaction' => 1,
+      'add_only' => 0,
+      'update' => 1,
+      'create_organism' => 0,
+      'create_target' => 0,
+      ///regexps for mRNA and protein.
+      're_mrna' => NULL,
+      're_protein' => NULL,
+      //optional
+      'target_organism_id' => $organism->organism_id,
+      'target_type' => NULL,
+      'start_line' => NULL,
+      'landmark_type' => NULL,
+      'alt_id_attr' => NULL,
+    ];
+    $this->loadLandmarks($analysis, $organism, $fasta);
+    $this->runGFFLoader($run_args, $gff);
+    $this->organism = $organism;
+    $this->analysis = $analysis;
+    $this->gene_cvt = chado_get_cvterm(array(
+      'name' => 'gene',
+      'cv_id' => array(
+        'name' => 'sequence',
+      ),
+    ))->cvterm_id;
+    $this->mrna_cvt = chado_get_cvterm(array(
+      'name' => 'mRNA',
+      'cv_id' => array(
+        'name' => 'sequence',
+      ),
+    ))->cvterm_id;
+    $this->gene_1_uname = 'test_gene_001';
+    $this->gene_2_uname = 'test_gene_002';
+  }
+
+  /**
+   * Ensures that the feature record is loaded correctly into chado.
+   *
+   * @group gff
+   */
+  public function testGFFImporterAttributeFeature() {
+    $this->initGFFImporterAttributes();
+    $organism = $this->organism;
+
+    $query = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('uniquename', $this->gene_1_uname)
+      ->condition('type_id', $this->gene_cvt)
+      ->execute();
+
+    $gene_1 = $query->fetchObject();
+    $this->assertEquals('test_gene_001', $gene_1->uniquename);
+    $this->assertEquals('test_gene_001', $gene_1->name);
+    $this->assertEquals($organism->organism_id, $gene_1->organism_id);
+    $this->assertEquals($this->gene_cvt, $gene_1->type_id);
+  }
+
+  /**
+   * Ensures the feature alias is loaded correctly into chado.
+   *
+   * @group gff
+   */
+  public function testGFFImporterAttributeAlias() {
+    $this->initGFFImporterAttributes();
+    $alias = 'first_test_gene';
+
+    $gene_1 = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('uniquename', $this->gene_1_uname)
+      ->condition('type_id', $this->gene_cvt)
+      ->execute()->fetchObject();
+
+    $query = db_select('chado.feature_synonym', 'fs');
+    $query->join('chado.synonym', 's', 's.synonym_id = fs.synonym_id');
+    $query->fields('s');
+    $query->condition('fs.feature_id', $gene_1->feature_id);
+    $query = $query->execute();
+    $result = $query->fetchObject();
+    $this->assertEquals($alias, $result->name);
+  }
+
+  /**
+   * Ensures that the dbxref records are loaded correctly into chado.
+   *
+   * @group gff
+   */
+  public function testGFFImporterAttributeDbxref() {
+    $this->initGFFImporterAttributes();
+    $test_db_name = 'TEST_DB';
+    $dbx_accession = 'test_gene_dbx_001';
+    $test_db = chado_get_db(array('name' => $test_db_name));
+    $gff_db = chado_get_db(array('name' => 'GFF_source'));
+
+    $gene_1 = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('uniquename', $this->gene_1_uname)
+      ->condition('type_id', $this->gene_cvt)
+      ->execute()->fetchObject();
+
+    $dbx_query = db_select('chado.feature_dbxref', 'fdbx');
+    $dbx_query->join('chado.dbxref', 'dbx', 'dbx.dbxref_id = fdbx.dbxref_id');
+    $dbx_query->fields('dbx');
+    $dbx_query->condition('fdbx.feature_id', $gene_1->feature_id);
+    $gff_query = clone $dbx_query;
+
+    $dbx_query->condition('dbx.db_id', $test_db->db_id);
+    $dbx_query = $dbx_query->execute();
+
+    $gff_query->condition('dbx.db_id', $gff_db->db_id);
+    $gff_query = $gff_query->execute();
+
+    $dbxref = $dbx_query->fetchObject();
+    $gff_dbxref = $gff_query->fetchObject();
+    $this->assertEquals($dbx_accession, $dbxref->accession);
+    $this->assertEquals($this->gene_1_uname, $gff_dbxref->accession);
+  }
+
+  /**
+   * Ensures ontology term records loaded correctly into chado.
+   *
+   * @group gff
+   */
+  public function testGFFImporterAttributeOntology() {
+    $this->initGFFImporterAttributes();
+    $ontology_db = 'SO';
+    $ontology_accession = '0000704';
+
+    $gene_1 = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('uniquename', $this->gene_1_uname)
+      ->condition('type_id', $this->gene_cvt)
+      ->execute()->fetchObject();
+
+    $term = chado_get_cvterm(array(
+      'dbxref_id' => array(
+        'accession' => $ontology_accession,
+        'db_id' => array(
+          'name' => $ontology_db,
+        ),
+      ),
+    ));
+
+    $feature_cvt = db_select('chado.feature_cvterm', 'fcvt')
+      ->fields('fcvt')
+      ->condition('cvterm_id', $term->cvterm_id)
+      ->condition('feature_id', $gene_1->feature_id)
+      ->execute();
+    $this->assertEquals(1, $feature_cvt->rowCount());
+  }
+
+  /**
+   * Ensures feature parent record loaded correctly into chado.
+   *
+   * @group gff
+   */
+  public function testGFFImporterAttributeParent() {
+    $this->initGFFImporterAttributes();
+    $mrna_uname = 'test_mrna_001.1';
+
+    $rel_cvt = chado_get_cvterm(array(
+      'name' => 'part_of',
+      'cv_id' => array(
+        'name' => 'sequence',
+      ),
+    ))->cvterm_id;
+
+    $mrna = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('uniquename', $mrna_uname)
+      ->condition('type_id', $this->mrna_cvt)
+      ->execute()->fetchObject();
+
+    $query = db_select('chado.feature_relationship', 'fr');
+    $query->join('chado.feature', 'f', 'f.feature_id = fr.object_id');
+    $query->fields('f');
+    $query->condition('fr.subject_id', $mrna->feature_id);
+    $query->condition('fr.type_id', $rel_cvt);
+    $query = $query->execute();
+    $parent = $query->fetchObject();
+
+    $this->assertEquals('test_gene_001', $parent->uniquename);
+    $this->assertEquals('test_gene_001', $parent->name);
+    $this->assertEquals($this->gene_cvt, $parent->type_id);
+    $this->assertEquals($this->organism->organism_id, $parent->organism_id);
+  }
+
+  /**
+   * Ensure target record loaded correctly into chado.
+   *
+   * @group gff
+   */
+  public function testGFFImporterAttributeTarget() {
+    $this->initGFFImporterAttributes();
+    $target_feature = 'scaffold1';
+    $start = 99;
+    $end = 200;
+    $target_type = 'supercontig';
+    $target_cvt = chado_get_cvterm(array(
+      'name' => $target_type,
+      'cv_id' => array(
+        'name' => 'sequence',
+      ),
+    ))->cvterm_id;
+
+    $source_feature = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('uniquename', $target_feature)
+      ->condition('type_id', $target_cvt)
+      ->execute()->fetchObject();
+
+    $gene_1 = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('uniquename', $this->gene_1_uname)
+      ->condition('type_id', $this->gene_cvt)
+      ->execute()->fetchObject();
+
+    $featureloc = db_select('chado.featureloc', 'fl')
+      ->fields('fl')
+      ->condition('fl.feature_id', $gene_1->feature_id)
+      ->condition('fl.srcfeature_id', $source_feature->feature_id)
+      ->execute()->fetchObject();
+
+    $this->assertEquals($start, $featureloc->fmin);
+    $this->assertEquals($end, $featureloc->fmax);
+  }
+
+  /**
+   * Ensure properties loaded correctly into chado.
+   *
+   * @group gff
+   */
+  public function testGFFImporterAttributeProperty() {
+    $this->initGFFImporterAttributes();
+    $gap_1 = 'test_gap_1';
+    $gap_2 = 'test_gap_2';
+    $note_val = 'test_gene_001_note';
+
+    $gene_1 = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('uniquename', $this->gene_1_uname)
+      ->condition('type_id', $this->gene_cvt)
+      ->execute()->fetchObject();
+
+    $gap_cvt = chado_get_cvterm(array(
+      'name' => 'Gap',
+      'cv_id' => array(
+        'name' => 'feature_property',
+      ),
+    ))->cvterm_id;
+
+    $note_cvt = chado_get_cvterm(array(
+      'name' => 'Note',
+      'cv_id' => array(
+        'name' => 'feature_property',
+      ),
+    ))->cvterm_id;
+
+    // Assert gaps loaded correctly
+    $gaps_query = db_select('chado.featureprop', 'fp')
+      ->fields('fp')
+      ->condition('feature_id', $gene_1->feature_id)
+      ->condition('type_id', $gap_cvt)
+      ->execute();
+
+    while (($gap = $gaps_query->fetchObject())) {
+      $gaps[$gap->value] = $gap;
+    }
+
+    $this->assertEquals($gap_1, $gaps[$gap_1]->value);
+    $this->assertEquals(0, $gaps[$gap_1]->rank);
+    $this->assertEquals($gap_2, $gaps[$gap_2]->value);
+    $this->assertEquals(1, $gaps[$gap_2]->rank);
+
+    // Assert note loaded correctly
+    $note = db_select('chado.featureprop', 'fp')
+      ->fields('fp')
+      ->condition('feature_id', $gene_1->feature_id)
+      ->condition('type_id', $note_cvt)
+      ->execute()->fetchObject();
+
+    $this->assertEquals($note_val, $note->value);
+    $this->assertEquals(0, $note->rank);
+  }
+
+  /**
+   * Ensure derives from information loaded correctly into chado.
+   *
+   * @group gff
+   */
+  public function testGFFImporterAttributeDerivesFrom() {
+    $this->initGFFImporterAttributes();
+
+    $gene_2 = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('uniquename', $this->gene_2_uname)
+      ->condition('type_id', $this->gene_cvt)
+      ->execute()->fetchObject();
+
+    $derivesfrom_cvt = chado_get_cvterm(array(
+      'name' => 'derives_from',
+      'cv_id' => array(
+        'name' => 'sequence',
+      ),
+    ))->cvterm_id;
+
+    $query = db_select('chado.feature', 'f');
+    $query->join('chado.feature_relationship', 'fr', 'f.feature_id = fr.object_id');
+    $query->fields('f');
+    $query->condition('fr.subject_id', $gene_2->feature_id);
+    $query->condition('fr.type_id', $derivesfrom_cvt);
+    $query = $query->execute();
+    $derivesfrom_feature = $query->fetchObject();
+
+    $this->assertEquals($this->gene_1_uname, $derivesfrom_feature->uniquename);
+    $this->assertEquals($this->gene_1_uname, $derivesfrom_feature->name);
+    $this->assertEquals($this->gene_cvt, $derivesfrom_feature->type_id);
+  }
 
   /**
    * Add a skip protein option.  Test that when checked, implicit proteins are
@@ -175,8 +506,10 @@ class GFF3ImporterTest extends TripalTestCase {
     //  });
   }
 
-  private function loadLandmarks($analysis, $organism) {
-    $landmark_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/empty_landmarks.fasta'];
+  private function loadLandmarks($analysis, $organism, $landmark_file = array()) {
+    if (empty($landmark_file)) {
+      $landmark_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/empty_landmarks.fasta'];
+    }
 
     $run_args = [
       'organism_id' => $organism->organism_id,

+ 141 - 150
tripal_chado/includes/TripalImporter/GFF3Importer.inc

@@ -145,16 +145,6 @@ class GFF3Importer extends TripalImporter {
        expression would be "$1-P$2".'),
     ];
 
-    $form['advanced']['use_transaction'] = [
-      '#type' => 'checkbox',
-      '#title' => t('Use a transaction'),
-      '#required' => FALSE,
-      '#description' => t('Use a database transaction when loading the GFF file.  If an error occurs
-      the entire datset loaded prior to the failure will be rolled back and will not be available
-      in the database.  If this option is unchecked and failure occurs all records up to the point
-      of failure will be present in the database.'),
-      '#default_value' => 1,
-    ];
     $form['advanced']['add_only'] = [
       '#type' => 'checkbox',
       '#title' => t('Import only new features'),
@@ -266,7 +256,6 @@ class GFF3Importer extends TripalImporter {
     $update = $form_state['values']['update'];
     $refresh = 0; //$form_state['values']['refresh'];
     $remove = 0; //$form_state['values']['remove'];
-    $use_transaction = $form_state['values']['use_transaction'];
     $line_number = trim($form_state['values']['line_number']);
     $landmark_type = trim($form_state['values']['landmark_type']);
     $alt_id_attr = trim($form_state['values']['alt_id_attr']);
@@ -319,7 +308,6 @@ class GFF3Importer extends TripalImporter {
     $update = $arguments['update'];
     $refresh = FALSE;
     $remove = FALSE;
-    $use_transaction = $arguments['use_transaction'];
     $target_organism_id = $arguments['target_organism_id'];
     $target_type = $arguments['target_type'];
     $create_target = $arguments['create_target'];
@@ -331,9 +319,13 @@ class GFF3Importer extends TripalImporter {
     $re_protein = $arguments['re_protein'];
     $skip_protein = $arguments['skip_protein'];
 
+    // An array that stores CVterms that have been looked up so we don't have
+    // to do the database query every time.
+    $this->cvterm_lookup = [];
+
 
     $this->loadGFF3($file_path, $organism_id, $analysis_id,
-      $add_only, $update, $refresh, $remove, $use_transaction,
+      $add_only, $update, $refresh, $remove,
       $target_organism_id, $target_type, $create_target,
       $start_line, $landmark_type, $alt_id_attr, $create_organism,
       $re_mrna, $re_protein, $skip_protein);
@@ -363,9 +355,6 @@ class GFF3Importer extends TripalImporter {
    * @param $remove
    *   Set to 1 to remove features present in the GFF file that exist in the
    *   database. Default is 0.
-   * @param $use_transaction
-   *   Set to 1 to use a transaction when loading the GFF. Any failure during
-   *   loading will result in the rollback of any changes. Default is 1.
    * @param $target_organism_id
    *   If the GFF file contains a 'Target' attribute then the feature and the
    *   target will have an alignment created, but to find the proper target
@@ -422,7 +411,7 @@ class GFF3Importer extends TripalImporter {
    * @ingroup gff3_loader
    */
   private function loadGFF3($gff_file, $organism_id, $analysis_id,
-                            $add_only = 0, $update = 1, $refresh = 0, $remove = 0, $use_transaction = 1,
+                            $add_only = 0, $update = 1, $refresh = 0, $remove = 0,
                             $target_organism_id = NULL, $target_type = NULL, $create_target = 0,
                             $start_line = 1, $landmark_type = '', $alt_id_attr = '', $create_organism = FALSE,
                             $re_mrna = '', $re_protein = '', $skip_protein = 0) {
@@ -430,10 +419,6 @@ class GFF3Importer extends TripalImporter {
     $ret = [];
     $date = getdate();
 
-    // An array that stores CVterms that have been looked up so we don't have
-    // to do the database query every time.
-    $cvterm_lookup = [];
-
     // An array that stores Landmarks that have been looked up so we don't have
     // to do the database query every time.
     $landmark_lookup = [];
@@ -471,10 +456,16 @@ class GFF3Importer extends TripalImporter {
     // get the controlled vocaubulary that we'll be using.  The
     // default is the 'sequence' ontology
     $sql = "SELECT * FROM {cv} WHERE name = :cvname";
+    $cv = chado_query($sql, [':cvname' => 'feature_property'])->fetchObject();
+    if (!$cv) {
+      throw new Exception(t("Cannot find the 'feature_property' ontology'", []));
+    }
+    $this->feature_property_cv_id = $cv->cv_id;
     $cv = chado_query($sql, [':cvname' => 'sequence'])->fetchObject();
     if (!$cv) {
       throw new Exception(t("Cannot find the 'sequence' ontology", []));
     }
+    $this->sequence_cv_id = $cv->cv_id;
     // get the organism for which this GFF3 file belongs
     $sql = "SELECT * FROM {organism} WHERE organism_id = :organism_id";
     $organism = chado_query($sql, [':organism_id' => $organism_id])->fetchObject();
@@ -483,26 +474,9 @@ class GFF3Importer extends TripalImporter {
     $line_num = 0;
     $num_read = 0;
 
-    // prepare the statement used to get the cvterm for each feature.
-    $sel_cvterm_sql = "
-      SELECT CVT.cvterm_id, CVT.cv_id, CVT.name, CVT.definition,
-        CVT.dbxref_id, CVT.is_obsolete, CVT.is_relationshiptype
-      FROM {cvterm} CVT
-        INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
-        LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
-      WHERE CV.cv_id = :cv_id and
-       (lower(CVT.name) = lower(:name) or lower(CVTS.synonym) = lower(:synonym))
-   ";
-
     // If a landmark type was provided then pre-retrieve that.
     if ($landmark_type) {
-      $query = [
-        ':cv_id' => $cv->cv_id,
-        ':name' => $landmark_type,
-        ':synonym' => $landmark_type,
-      ];
-      $result = chado_query($sel_cvterm_sql, $query);
-      $landmark_cvterm = $result->fetchObject();
+      $landmark_cvterm = $this->getCvterm($landmark_type);
       if (!$landmark_cvterm) {
         throw new Exception(t('Cannot find landmark feature type \'%landmark_type\'.', ['%landmark_type' => $landmark_type]));
       }
@@ -511,6 +485,7 @@ class GFF3Importer extends TripalImporter {
     // iterate through each line of the GFF file
     while ($line = fgets($fh)) {
       $line_num++;
+      $this->line_num = $line_num;
       $size = drupal_strlen($line);
       $this->addItemsHandled($size);
       $num_read += $size;
@@ -596,21 +571,11 @@ class GFF3Importer extends TripalImporter {
           $phase = '';
         }
       }
-      if (array_key_exists($type, $cvterm_lookup)) {
-        $cvterm = $cvterm_lookup[$type];
-      }
-      else {
-        $result = chado_query($sel_cvterm_sql, [
-          ':cv_id' => $cv->cv_id,
-          ':name' => $type,
-          ':synonym' => $type,
-        ]);
-        $cvterm = $result->fetchObject();
-        $cvterm_lookup[$type] = $cvterm;
-        if (!$cvterm) {
-          throw new Exception(t('Cannot find feature term \'%type\' on line %line_num of the GFF file',
-            ['%type' => $type, '%line_num' => $line_num]));
-        }
+
+      $cvterm = $this->getCvterm($type);
+      if (!$cvterm) {
+        throw new Exception(t('Cannot find feature term \'%type\' on line %line_num of the GFF file',
+          ['%type' => $type, '%line_num' => $line_num]));
       }
 
       // break apart each of the attributes
@@ -678,7 +643,9 @@ class GFF3Importer extends TripalImporter {
             $org = chado_select_record('organism', ["*"], $values);
             if (count($org) == 0) {
               if ($create_organism) {
-                $feature_organism = (object) chado_insert_record('organism', $values);
+                $feature_organism = (object) chado_insert_record('organism', $values, array(
+                  'skip_validation' => TRUE,
+                ));
                 if (!$feature_organism) {
                   $this->logMessage("Could not add the organism, '%org', from line %line. Skipping this line.",
                     [
@@ -950,12 +917,7 @@ class GFF3Importer extends TripalImporter {
             TGPT.feature_id, TGPT.fmin, TGPT.fmax, TGCT.strand, FLM.uniquename
         ";
         $results = chado_query($sql);
-        $protein_cvterm = chado_get_cvterm([
-          'name' => 'polypeptide',
-          'cv_id' => [
-            'name' => 'sequence',
-          ],
-        ]);
+        $protein_cvterm = $this->getCvterm('polypeptide');
         while ($result = $results->fetchObject()) {
           // If a protein exists with this same parent then don't add a new
           // protein.
@@ -996,7 +958,7 @@ class GFF3Importer extends TripalImporter {
               $feature = $this->loadFeature($organism, $analysis_id,
                 $protein_cvterm, $uname, $name, '', 'f', 'f', 1, 0);
               // Add the derives_from relationship.
-              $cvterm = chado_get_cvterm(['cvterm_id' => $result->cvterm_id]);
+              $cvterm = $this->getCvterm($result->feature_type);
               $this->loadDerivesFrom($feature, $cvterm,
                 $result->uniquename, $organism, $pfmin, $pfmax);
               // Add the featureloc record. Set the start of the protein to
@@ -1012,26 +974,26 @@ class GFF3Importer extends TripalImporter {
 
       // Get features in a relationship that are also children of an alignment.
       $sql = "
-      SELECT DISTINCT F.feature_id, F.organism_id, F.type_id,
-        F.uniquename, FL.strand
-      FROM {tripal_gff_temp} TGT
-        INNER JOIN {feature} F                ON TGT.feature_id = F.feature_id
-        INNER JOIN {feature_relationship} FR  ON FR.object_id   = TGT.feature_id
-        INNER JOIN {cvterm} CVT               ON CVT.cvterm_id  = FR.type_id
-        INNER JOIN {featureloc} FL            ON FL.feature_id  = F.feature_id
-      WHERE CVT.name = 'part_of'
-    ";
+        SELECT DISTINCT F.feature_id, F.organism_id, F.type_id,
+          F.uniquename, FL.strand
+        FROM {tripal_gff_temp} TGT
+          INNER JOIN {feature} F                ON TGT.feature_id = F.feature_id
+          INNER JOIN {feature_relationship} FR  ON FR.object_id   = TGT.feature_id
+          INNER JOIN {cvterm} CVT               ON CVT.cvterm_id  = FR.type_id
+          INNER JOIN {featureloc} FL            ON FL.feature_id  = F.feature_id
+        WHERE CVT.name = 'part_of'
+        ";
       $parents = chado_query($sql);
 
       // Build and prepare the SQL for selecting the children relationship.
       $sel_gffchildren_sql = "
-      SELECT DISTINCT FR.feature_relationship_id, FL.fmin, FR.rank
-      FROM {feature_relationship} FR
-        INNER JOIN {featureloc} FL on FL.feature_id = FR.subject_id
-        INNER JOIN {cvterm} CVT on CVT.cvterm_id = FR.type_id
-      WHERE FR.object_id = :feature_id AND CVT.name = 'part_of'
-      ORDER BY FL.fmin ASC
-    ";
+        SELECT DISTINCT FR.feature_relationship_id, FL.fmin, FR.rank
+        FROM {feature_relationship} FR
+          INNER JOIN {featureloc} FL on FL.feature_id = FR.subject_id
+          INNER JOIN {cvterm} CVT on CVT.cvterm_id = FR.type_id
+        WHERE FR.object_id = :feature_id AND CVT.name = 'part_of'
+        ORDER BY FL.fmin ASC
+        ";
 
       // Now set the rank of any parent/child relationships.  The order is based
       // on the fmin.  The start rank is 1.  This allows features with other
@@ -1079,10 +1041,48 @@ class GFF3Importer extends TripalImporter {
       }
     }
 
-
     return 1;
   }
 
+  /**
+   * Load a controlled vocabulary term.
+   *
+   * This method first checks if the term has already been loaded in the
+   * cvterm_lookup array, which helps a lot with performance.
+   *
+   * @param $type
+   * @param $cv_id
+   *
+   * @ingroup gff3_loader
+   */
+  private function getCvterm($type, $cv_id = NULL) {
+    if (!isset($cv_id)) {
+      $cv_id = $this->sequence_cv_id;
+    }
+    if (array_key_exists($type, $this->cvterm_lookup)) {
+      return $this->cvterm_lookup[$type];
+    }
+    else {
+      $sel_cvterm_sql = "
+        SELECT CVT.cvterm_id
+        FROM {cvterm} CVT
+          LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
+        WHERE CVT.cv_id = {$cv_id} and
+         (lower(CVT.name) = lower(:name) or lower(CVTS.synonym) = lower(:synonym))
+        ";
+      $result = chado_query($sel_cvterm_sql, [
+        ':name' => $type,
+        ':synonym' => $type,
+      ]);
+      $cvterm = $result->fetchObject() ?? NULL;
+      if ($cvterm) {
+        $cvterm = chado_get_cvterm(array('cvterm_id' => $cvterm->cvterm_id)) ?? NULL;
+      }
+      $this->cvterm_lookup[$type] = $cvterm;
+      return $cvterm;
+    }
+  }
+
   /**
    * Load the derives from attribute for a gff3 feature
    *
@@ -1096,6 +1096,7 @@ class GFF3Importer extends TripalImporter {
                                    $organism, $fmin, $fmax) {
 
     $type = $cvterm->name;
+    $derivesfrom_term = $this->getCvterm('derives_from');
 
     // First look for the object feature in the temp table to get it's type.
     $values = [
@@ -1105,15 +1106,7 @@ class GFF3Importer extends TripalImporter {
     $result = chado_select_record('tripal_gff_temp', ['type_name'], $values);
     $type_id = NULL;
     if (count($result) > 0) {
-      $otype = chado_get_cvterm([
-        'name' => $result[0]->type_name,
-        'cv_id' => [
-          'name' => 'sequence',
-        ],
-      ]);
-      if ($otype) {
-        $type_id = $otype->cvterm_id;
-      }
+      $type_id = $this->getCvterm($result[0]->type_name)->cvterm_id ?? NULL;
     }
 
     // If the object wasn't in the temp table then look for it in the
@@ -1173,12 +1166,7 @@ class GFF3Importer extends TripalImporter {
     $values = [
       'object_id' => $ofeature[0]->feature_id,
       'subject_id' => $feature->feature_id,
-      'type_id' => [
-        'cv_id' => [
-          'name' => 'sequence',
-        ],
-        'name' => 'derives_from',
-      ],
+      'type_id' => $derivesfrom_term->cvterm_id,
       'rank' => 0,
     ];
     $rel = chado_select_record('feature_relationship', ['*'], $values);
@@ -1187,7 +1175,9 @@ class GFF3Importer extends TripalImporter {
     }
 
     // finally insert the relationship if it doesn't exist
-    $ret = chado_insert_record('feature_relationship', $values);
+    $ret = chado_insert_record('feature_relationship', $values, array(
+      'skip_validation' => TRUE,
+    ));
     if (!$ret) {
       $this->logMessage("Could not add 'Derives_from' relationship for :uniquename and :subject.",
         [
@@ -1214,15 +1204,10 @@ class GFF3Importer extends TripalImporter {
     $uname = $feature->uniquename;
     $type = $cvterm->name;
     $rel_type = 'part_of';
-
-    // Prepare these SQL statements that will be used repeatedly.
-    $cvterm_sql = "
-    SELECT CVT.cvterm_id
-    FROM {cvterm} CVT
-      INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
-      LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
-    WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
-  ";
+    $relcvterm = $this->getCvterm($rel_type);
+    if (!$relcvterm) {
+      throw new Exception(t("Cannot find the term, 'part_of', from the sequence ontology. This term is used for associating parent and children features. Please check that the ontology is fully imported."));
+    }
 
     // Iterate through the parents in the list.
     foreach ($parents as $parent) {
@@ -1239,19 +1224,7 @@ class GFF3Importer extends TripalImporter {
       $parent_type = $result[0]->type_name;
 
       // try to find the parent
-      $parentcvterm = chado_query($cvterm_sql, [
-        ':cvname' => 'sequence',
-        ':name' => $parent_type,
-        ':synonym' => $parent_type,
-      ])->fetchObject();
-      $relcvterm = chado_query($cvterm_sql, [
-        ':cvname' => 'sequence',
-        ':name' => $rel_type,
-        ':synonym' => $rel_type,
-      ])->fetchObject();
-      if (!$relcvterm) {
-        throw new Exception(t("Cannot find the term, 'part_of', from the sequence ontology. This term is used for associating parent and children features. Please check that the ontology is fully imported."));
-      }
+      $parentcvterm = $this->getCvterm($parent_type);
       $values = [
         'organism_id' => $organism_id,
         'uniquename' => $parent,
@@ -1280,7 +1253,9 @@ class GFF3Importer extends TripalImporter {
             'object_id' => $parent_feature->feature_id,
             'type_id' => $relcvterm->cvterm_id,
           ];
-          $result = chado_insert_record('feature_relationship', $values);
+          $result = chado_insert_record('feature_relationship', $values, array(
+            'skip_validation' => TRUE,
+          ));
           if (!$result) {
             $this->logMessage("Failed to insert feature relationship '$uname' ($type) $rel_type '$parent' ($parent_type).",
               [], TRIPAL_WARNING);
@@ -1300,7 +1275,9 @@ class GFF3Importer extends TripalImporter {
           if (isset($phase)) {
             $values['phase'] = $phase;
           }
-          $result = chado_insert_record('tripal_gffcds_temp', $values);
+          $result = chado_insert_record('tripal_gffcds_temp', $values, array(
+            'skip_validation' => TRUE,
+          ));
           if (!$result) {
             throw new Exception(t("Cound not save record in temporary CDS table, Cannot continue.", []));
             exit;
@@ -1347,7 +1324,9 @@ class GFF3Importer extends TripalImporter {
           'name' => $dbname,
           'description' => 'Added automatically by the GFF loader',
         ];
-        $success = chado_insert_record('db', $values);
+        $success = chado_insert_record('db', $values, array(
+          'skip_validation' => TRUE,
+        ));
         if ($success) {
           $values = ['name' => "$dbname"];
           $db = chado_select_record('db', ['db_id'], $values);
@@ -1373,7 +1352,9 @@ class GFF3Importer extends TripalImporter {
           'accession' => $accession,
           'version' => '',
         ];
-        $ret = chado_insert_record('dbxref', $values);
+        $ret = chado_insert_record('dbxref', $values, array(
+          'skip_validation' => TRUE,
+        ));
         $values = [
           'accession' => $accession,
           'db_id' => $db->db_id,
@@ -1396,7 +1377,9 @@ class GFF3Importer extends TripalImporter {
           'dbxref_id' => $dbxref->dbxref_id,
           'feature_id' => $feature->feature_id,
         ];
-        $success = chado_insert_record('feature_dbxref', $values);
+        $success = chado_insert_record('feature_dbxref', $values, array(
+          'skip_validation' => TRUE,
+        ));
         if (!$success) {
           $this->logMessage("Failed to insert Dbxref: $dbname:$accession.", [], TRIPAL_WARNING);
           return 0;
@@ -1480,7 +1463,9 @@ class GFF3Importer extends TripalImporter {
             'uniquename' => 'null',
           ],
         ];
-        $success = chado_insert_record('feature_cvterm', $values);
+        $success = chado_insert_record('feature_cvterm', $values, array(
+          'skip_validation' => TRUE,
+        ));
 
         if (!$success) {
           $this->logMessage("Failed to insert ontology term: $dbname:$accession.", [], TRIPAL_WARNING);
@@ -1510,7 +1495,9 @@ class GFF3Importer extends TripalImporter {
         'name' => 'synonym_type',
         'definition' => 'vocabulary for synonym types',
       ];
-      $success = chado_insert_record('cv', $values);
+      $success = chado_insert_record('cv', $values, array(
+        'skip_validation' => TRUE,
+      ));
       if (!$success) {
         $this->logMessage("Failed to add the synonyms type vocabulary.", [], TRIPAL_WARNING);
         return 0;
@@ -1568,7 +1555,9 @@ class GFF3Importer extends TripalImporter {
           'type_id' => $syntype->cvterm_id,
           'synonym_sgml' => '',
         ];
-        $success = chado_insert_record('synonym', $values);
+        $success = chado_insert_record('synonym', $values, array(
+          'skip_validation' => TRUE,
+        ));
         if (!$success) {
           $this->logMessage("Cannot add alias $alias to synonym table.", [], TRIPAL_WARNING);
           return 0;
@@ -1631,7 +1620,9 @@ class GFF3Importer extends TripalImporter {
           'feature_id' => $feature->feature_id,
           'pub_id' => $pub->pub_id,
         ];
-        $success = chado_insert_record('feature_synonym', $values);
+        $success = chado_insert_record('feature_synonym', $values, array(
+          'skip_validation' => TRUE,
+        ));
 
         if (!$success) {
           $this->logMessage("Cannot add alias $alias to feature synonym table.", [], TRIPAL_WARNING);
@@ -1705,7 +1696,9 @@ class GFF3Importer extends TripalImporter {
         'is_analysis' => $is_analysis,
         'is_obsolete' => $is_obsolete,
       ];
-      $feature = (object) chado_insert_record('feature', $values);
+      $feature = (object) chado_insert_record('feature', $values, array(
+        'skip_validation' => TRUE,
+      ));
       if (!$feature) {
         $this->logMessage("Failed to insert feature '$uniquename' ($cvterm->name).", [], TRIPAL_WARNING);
         return 0;
@@ -1747,7 +1740,7 @@ class GFF3Importer extends TripalImporter {
       if (strcmp($score, '.') != 0) {
         $af_values['significance'] = $score;
       }
-      if (!chado_insert_record('analysisfeature', $af_values)) {
+      if (!chado_insert_record('analysisfeature', $af_values, array('skip_validation' => TRUE))) {
         $this->logMessage("Could not add analysisfeature record: $analysis_id, $feature->feature_id.", [], TRIPAL_WARNING);
       }
     }
@@ -1839,7 +1832,9 @@ class GFF3Importer extends TripalImporter {
               'uniquename' => $landmark,
               'type_id' => $landmark_type_id,
             ];
-            $results = chado_insert_record('feature', $values);
+            $results = chado_insert_record('feature', $values, array(
+              'skip_validation' => TRUE,
+            ));
             if (!$results) {
               $this->logMessage("Cannot find landmark feature: '%landmark', nor could it be inserted.",
                 ['%landmark' => $landmark], TRIPAL_WARNING);
@@ -1930,13 +1925,13 @@ class GFF3Importer extends TripalImporter {
       if (strcmp($is_fmin_partial, 'f') == 0 or !$is_fmin_partial) {
         $is_fmin_partial = 'FALSE';
       }
-      elseif (strcmp($is_fmin_partial, 't') == 0 or $is_fmin_partial = 1) {
+      elseif (strcmp($is_fmin_partial, 't') == 0 or $is_fmin_partial == 1) {
         $is_fmin_partial = 'TRUE';
       }
       if (strcmp($is_fmax_partial, 'f') == 0 or !$is_fmax_partial) {
         $is_fmax_partial = 'FALSE';
       }
-      elseif (strcmp($is_fmax_partial, 't') == 0 or $is_fmax_partial = 1) {
+      elseif (strcmp($is_fmax_partial, 't') == 0 or $is_fmax_partial == 1) {
         $is_fmax_partial = 'TRUE';
       }
       $values = [
@@ -1954,7 +1949,9 @@ class GFF3Importer extends TripalImporter {
       if ($phase) {
         $values['phase'] = $phase;
       }
-      $success = chado_insert_record('featureloc', $values);
+      $success = chado_insert_record('featureloc', $values, array(
+        'skip_validation' => TRUE,
+      ));
       if (!$success) {
         throw new Exception("Failed to insert featureloc.");
       }
@@ -1974,17 +1971,11 @@ class GFF3Importer extends TripalImporter {
   private function loadProperty($feature, $property, $value) {
 
     // First make sure the cvterm exists.  if not, then add it.
-    $select = [
-      'name' => $property,
-      'cv_id' => [
-        'name' => 'feature_property',
-      ],
-    ];
-    $result = chado_select_record('cvterm', ['*'], $select);
+    $result = $this->getCvterm($property, $this->feature_property_cv_id);
 
     // If we don't have a property like this already, then add it otherwise,
     // just return.
-    if (count($result) == 0) {
+    if (empty($result)) {
       $term = [
         'id' => "local:$property",
         'name' => $property,
@@ -2000,7 +1991,7 @@ class GFF3Importer extends TripalImporter {
       }
     }
     else {
-      $cvterm = $result[0];
+      $cvterm = $result;
     }
 
 
@@ -2147,7 +2138,7 @@ class GFF3Importer extends TripalImporter {
       $start = $matches[2];
       $end = $matches[3];
       // if we have an optional strand, convert it to a numeric value.
-      if ($matches[4]) {
+      if (!empty($matches[4])) {
         if (preg_match('/^\+$/', trim($matches[4]))) {
           $target_strand = 1;
         }
@@ -2233,12 +2224,12 @@ class GFF3Importer extends TripalImporter {
         else {
           // check to see if this is a synonym
           $sql = "
-          SELECT CVTS.cvterm_id
-          FROM {cvtermsynonym} CVTS
-            INNER JOIN {cvterm} CVT ON CVT.cvterm_id = CVTS.cvterm_id
-            INNER JOIN {cv} CV      ON CV.cv_id = CVT.cv_id
-          WHERE CV.name = 'sequence' and CVTS.synonym = :synonym
-        ";
+            SELECT CVTS.cvterm_id
+            FROM {cvtermsynonym} CVTS
+              INNER JOIN {cvterm} CVT ON CVT.cvterm_id = CVTS.cvterm_id
+              INNER JOIN {cv} CV      ON CV.cv_id = CVT.cv_id
+            WHERE CV.name = 'sequence' and CVTS.synonym = :synonym
+            ";
           $synonym = chado_query($sql, [':synonym' => $gff_target_type])->fetchObject();
           if ($synonym) {
             $t_type_id = $synonym->cvterm_id;
@@ -2253,8 +2244,8 @@ class GFF3Importer extends TripalImporter {
 
       // we want to add a featureloc record that uses the target feature as the srcfeature (landmark)
       // and the landmark as the feature.
-      $this->loadFeatureLoc($feature, $organism, $target_feature, $target_fmin,
-        $target_fmax, $target_strand, $phase, $attr_fmin_partial, $attr_fmax_partial, $attr_residue_info,
+      $this->loadFeatureLoc($feature, NULL, $target_feature, $target_fmin,
+        $target_fmax, $target_strand, NULL, NULL, NULL, NULL,
         $attr_locgroup, $t_type_id, $t_organism_id, $create_target, TRUE);
     }
     // the target attribute is not correctly formatted