Browse Source

Merge branch '1040-tv3-gff3_performance' of github.com:tripal/tripal into 1040-tv3-gff3_performance

Stephen Ficklin 4 years ago
parent
commit
9807839cf5

+ 16 - 0
tests/tripal_chado/data/small_gene.gff

@@ -1,5 +1,21 @@
 ##gff-version 3
 ##gff-version 3
+scaffold1	scaffold1	scaffold	1	1000	.	.	.	ID=scaffold1
 scaffold1	test_gene_001	gene	100	200	.	+	.	ID=test_gene_001;Name=test_gene_001;biotype=protein_coding;Alias=first_test_gene;Dbxref=TEST_DB:test_gene_dbx_001;Ontology_term=SO:0000704;Target=scaffold1 100 200;target_type=supercontig;Gap=test_gap_1;Gap=test_gap_2;Note=test_gene_001_note
 scaffold1	test_gene_001	gene	100	200	.	+	.	ID=test_gene_001;Name=test_gene_001;biotype=protein_coding;Alias=first_test_gene;Dbxref=TEST_DB:test_gene_dbx_001;Ontology_term=SO:0000704;Target=scaffold1 100 200;target_type=supercontig;Gap=test_gap_1;Gap=test_gap_2;Note=test_gene_001_note
 scaffold1	test_mrna_001	mRNA	100	150	.	+	.	ID=test_mrna_001.1;Parent=test_gene_001;Name=test_mrna_001;biotype=protein_coding;AED=0.05
 scaffold1	test_mrna_001	mRNA	100	150	.	+	.	ID=test_mrna_001.1;Parent=test_gene_001;Name=test_mrna_001;biotype=protein_coding;AED=0.05
 scaffold1	test_protein_001	polypeptide	100	150	.	+	.	ID=test_protein_001.1;Parent=test_mrna_001.1
 scaffold1	test_protein_001	polypeptide	100	150	.	+	.	ID=test_protein_001.1;Parent=test_mrna_001.1
 scaffold1	test_gene_002	gene	300	400	.	+	.	ID=test_gene_002;Name=test_gene_002;biotype=protein_coding;Derives_from=test_gene_001
 scaffold1	test_gene_002	gene	300	400	.	+	.	ID=test_gene_002;Name=test_gene_002;biotype=protein_coding;Derives_from=test_gene_001
+##FASTA
+>scaffold1
+TAGTGTCTTTTTATTGGTTAATGAGTTTCTTTTTTTATAAACAATATTTTGATTTAAAAAAGAACGTAGGACTTAAATGC
+AATTTTTTAAATCTACAATTGAGGAGATTTTATGCACAATATTATATTAATTGCAAAAAAAACCCACTTACAAGAATCAC
+CCATATTGTTAATGAAATAATTCCATATTATTGGTTTTCAAATTTTATCCCTCCTACGTGTCAAAATAGTGTCTTTTTAT
+TGGTTAATGATTTTTTTTGTATAAACAATATCTTTCATTAAAAAAACGTGCGACTGAAATGCAATTATGTTCCACCACAT
+ATATCACATATTATTATTAATTTCAAAATAACCCCACTTACCATATTCAGCCATATTATTAATGAATTAATTTCACATTA
+TTGATTTTTAAATTTTATCTCTCCTGCATGTCAAAATGACGTCTTTTTATTGGCTATTTAGTTTAGCTTTTTAATCAATT
+GCTTCATATTAAAAACGTAAATATATAAATGTACATTCCACTTAATTTGGGTGCCCAATATTATATTAATTGTCAAAAAA
+ACCCAATTTTCAAATTTCTTCTATCCTATATGACAAAATAATGTCTTTTTATTGGTTAATGAGTTTTTTTTTTATAAACA
+ATATATTGATTGAAAAAGTGTAGGACGTAAATGCATTTTTTTTTTAATTTTACAATTGATTAAATTTTATGCAATATATT
+ATACTAATTCCCAAAAAACCCACTTACAAGAATCACCCATATTGTTAATGAAATAATTCAACATTATTGGTTTTCAAATT
+TTATCTCTCCTACGTGTCAAAATAGTATCTTTTTATTGGTTAAACAGTTTTATTTTTTTTATAAACAATATTTTCATTAA
+AAAAAACGTGGGACTGAAATGCAATTATGTTCCACCATATATATCATATATTATTATTTTTAAAGTAATTTAGATAAATT
+AGGTAATTACATCAATTTAAATTAATTACTTATTTGTTAC

+ 26 - 0
tests/tripal_chado/loaders/GFF3ImporterTest.php

@@ -93,8 +93,15 @@ class GFF3ImporterTest extends TripalTestCase {
         'name' => 'sequence',
         'name' => 'sequence',
       ),
       ),
     ))->cvterm_id;
     ))->cvterm_id;
+    $this->supercontig_cvt = chado_get_cvterm(array(
+      'name' => 'supercontig',
+      'cv_id' => array(
+        'name' => 'sequence',
+      ),
+    ))->cvterm_id;
     $this->gene_1_uname = 'test_gene_001';
     $this->gene_1_uname = 'test_gene_001';
     $this->gene_2_uname = 'test_gene_002';
     $this->gene_2_uname = 'test_gene_002';
+    $this->scaffold_1_uname = 'scaffold1';
   }
   }
 
 
   /**
   /**
@@ -380,6 +387,25 @@ class GFF3ImporterTest extends TripalTestCase {
     $this->assertEquals($this->gene_cvt, $derivesfrom_feature->type_id);
     $this->assertEquals($this->gene_cvt, $derivesfrom_feature->type_id);
   }
   }
 
 
+  /**
+   * Ensure FASTA information loaded correctly into chado.
+   *
+   * @group gff
+   */
+  public function testGFFImporterAttributeFastas() {
+    $this->initGFFImporterAttributes();
+
+    $scaffold = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('uniquename', $this->scaffold_1_uname)
+      ->condition('type_id', $this->supercontig_cvt)
+      ->execute()->fetchObject();
+
+    $this->assertEquals(1000, $scaffold->seqlen);
+    $this->assertEquals(1000, strlen($scaffold->residues));
+    $this->assertEquals('0154424abe69dd64cd428c330d480ba0', $scaffold->md5checksum);
+  }
+
   /**
   /**
    * Add a skip protein option.  Test that when checked, implicit proteins are
    * Add a skip protein option.  Test that when checked, implicit proteins are
    * not created, but that they are created when unchecked.
    * not created, but that they are created when unchecked.

+ 375 - 18
tripal_chado/includes/TripalImporter/GFF3Importer.inc

@@ -187,6 +187,16 @@ class GFF3Importer extends TripalImporter {
    */
    */
   private $create_organism = FALSE;
   private $create_organism = FALSE;
 
 
+  /**
+   * Holds mapping of DB names to DB ids.
+   */
+  private $db_lookup = [];
+
+  /**
+   * Holds a mapping of Dbxref names to ids.
+   */
+  private $dbxref_lookup = [];
+
   /**
   /**
    * An array that stores CVterms that have been looked up so we don't have
    * An array that stores CVterms that have been looked up so we don't have
    * to do the database query every time.
    * to do the database query every time.
@@ -624,15 +634,23 @@ class GFF3Importer extends TripalImporter {
     $this->logMessage("Step 4: Loading feature locations...                  ");
     $this->logMessage("Step 4: Loading feature locations...                  ");
     $this->loadFeatureLocs();
     $this->loadFeatureLocs();
 
 
-    $this->logMessage("Step 5: Loading features properties...                ");
+    $this->logMessage("Step 5: Loading features Derives_from (gene/CDS relationships)...");
+    $this->loadDerivesFroms();
+
+    $this->logMessage("Step 6: Loading features properties...                ");
     $this->loadProperties();
     $this->loadProperties();
 
 
-    $this->logMessage("Step 6: Loading features synonyms (aliases)...        ");
+    $this->logMessage("Step 7: Loading features synonyms (aliases)...        ");
     $this->loadAliases();
     $this->loadAliases();
 
 
-    $this->logMessage("Step 7: Loading features cross references...          ");
+    $this->logMessage("Step 8: Loading features cross references...          ");
     $this->loadDbxrefs();
     $this->loadDbxrefs();
 
 
+    if (!empty($this->residue_index)) {
+      $this->logMessage("Step 9: Loading residues from FASTA...                ");
+      $this->loadFastas();
+    }
+
   }
   }
 
 
   /**
   /**
@@ -755,6 +773,59 @@ class GFF3Importer extends TripalImporter {
     $this->null_pub = $pub;
     $this->null_pub = $pub;
   }
   }
 
 
+  /**
+   * Makes sure Chado is ready with the necessary Dbxref records.
+   */
+  private function prepDbxrefs() {
+    $sql = "
+      SELECT db_id
+      FROM {db}
+      WHERE name = :dbname";
+
+    foreach ($this->db_lookup as $dbname => $value) {
+      $result = chado_query($sql, array(
+        ':dbname' => $dbname,
+      ));
+
+      $db = $result->fetchObject() ?? NULL;
+      if ($db) {
+        $db = chado_get_db(array('db_id' => $db->db_id));
+      }
+      else {
+        $db = chado_insert_db(array(
+          'name' => $dbname,
+        ));
+      }
+
+      $this->db_lookup[$dbname] = $db->db_id;
+    }
+
+    $sql = "
+      SELECT dbxref_id
+      FROM {dbxref}
+      WHERE db_id = :db_id and accession = :accession";
+
+    foreach ($this->dbxref_lookup as $index => $info) {
+      $result = chado_query($sql, array(
+        ':db_id' => $this->db_lookup[$info['db']],
+        ':accession' => $info['accession'],
+      ));
+
+      $dbx = $result->fetchObject() ?? NULL;
+      if ($dbx) {
+        $dbx = chado_get_dbxref(array('dbxref_id' => $dbx->dbxref_id));
+      }
+      else {
+        $dbx = chado_insert_dbxref(array(
+          'db_id' => $this->db_lookup[$info['db']],
+          'accession' => $info['accession'],
+        ));
+      }
+
+      $this->dbxref_lookup[$index] = $dbx->dbxref_id;
+    }
+  }
+
   /**
   /**
    * Parses the current line of the GFF3 file for a feature.
    * Parses the current line of the GFF3 file for a feature.
    *
    *
@@ -824,6 +895,8 @@ class GFF3Importer extends TripalImporter {
     $attr_parent = '';
     $attr_parent = '';
     $attr_others = [];
     $attr_others = [];
     $attr_aliases = [];
     $attr_aliases = [];
+    $attr_dbxref = [];
+    $attr_derives = [];
     foreach ($attrs as $attr) {
     foreach ($attrs as $attr) {
       $attr = rtrim($attr);
       $attr = rtrim($attr);
       $attr = ltrim($attr);
       $attr = ltrim($attr);
@@ -859,6 +932,12 @@ class GFF3Importer extends TripalImporter {
       elseif (strcmp($tag_name, 'Parent') == 0) {
       elseif (strcmp($tag_name, 'Parent') == 0) {
         $attr_parent = urldecode($tag[1]);
         $attr_parent = urldecode($tag[1]);
       }
       }
+      elseif (strcmp($tag_name, 'Dbxref') == 0) {
+        $attr_dbxref = array_merge($attr_dbxref, $tags[$tag_name]);
+      }
+      elseif (strcmp($tag_name, 'Derives_from') == 0) {
+        $attr_derives = array_merge($attr_derives, $tags[$tag_name]);
+      }
       // Get the list of non-reserved attributes.
       // Get the list of non-reserved attributes.
       elseif (strcmp($tag_name, 'Name') !=0 and strcmp($tag_name, 'ID') !=0 and
       elseif (strcmp($tag_name, 'Name') !=0 and strcmp($tag_name, 'ID') !=0 and
               strcmp($tag_name, 'Alias') != 0 and strcmp($tag_name, 'Parent') != 0 and
               strcmp($tag_name, 'Alias') != 0 and strcmp($tag_name, 'Parent') != 0 and
@@ -883,6 +962,31 @@ class GFF3Importer extends TripalImporter {
     $ret['name'] = $attr_name;
     $ret['name'] = $attr_name;
     $ret['uniquename'] = $attr_uniquename;
     $ret['uniquename'] = $attr_uniquename;
     $ret['synonyms'] = $attr_aliases;
     $ret['synonyms'] = $attr_aliases;
+    $ret['dbxrefs'] = [];
+    foreach ($attr_dbxref as $key => $dbx) {
+      $parts = explode(':', $dbx);
+      if (count($parts) != 2) {
+        throw new Exception(t('Dbxrefs must be of the format: "Dbxref=<db name>:<accession>". The Dbxref %dbx on line %line_num does not satisfy this format.',
+            ['%line_num' => $this->current_line, '%dbx' => $dbx]));
+      }
+      $ret['dbxrefs']["{$parts[0]}:{$parts[1]}"] = array(
+        'db' => $parts[0],
+        'accession' => $parts[1],
+      );
+    }
+    $ret['dbxrefs']["GFF_source:{$ret['source']}"] = array(
+      'db' => 'GFF_source',
+      'accession' => $ret['source'],
+    );
+
+    $ret['derives_from'] = $attr_derives;
+    if (count($ret['derives_from']) > 1) {
+      throw new Exception(t('Each feature can only have one "Derives_from" attribute. The feature %uniquename has more than one: %derives',
+        [
+          '%uniquename' => $ret['uniquename'],
+          '%derives' => $ret['derives_from'],
+        ]));
+    }
 
 
     // Now add all of the attributes into the return array.
     // Now add all of the attributes into the return array.
     foreach ($tags as $key => $value) {
     foreach ($tags as $key => $value) {
@@ -912,8 +1016,72 @@ class GFF3Importer extends TripalImporter {
       // Get the ID and the current file pointer and store that for later.
       // Get the ID and the current file pointer and store that for later.
       if (preg_match('/^>/', $line)) {
       if (preg_match('/^>/', $line)) {
         $id = preg_replace('/^>([^\s]+).*$/', '\1', $line);
         $id = preg_replace('/^>([^\s]+).*$/', '\1', $line);
-        $this->residue_index[$id] = ftell($this->gff_file_h);
+        $this->residue_index[trim($id)] = ftell($this->gff_file_h);
+      }
+    }
+  }
+
+  /**
+   * Loads the actual residue information from the FASTA section of the file.
+   */
+  private function loadFastas() {
+
+    $num_residues = count(array_keys($this->residue_index));
+
+    $this->setItemsHandled(0);
+    $this->setTotalItems($num_residues);
+
+    $count = 0;
+
+    foreach ($this->residue_index as $uniquename => $offset) {
+      $is_landmark = FALSE;
+      if (!(array_key_exists($uniquename, $this->features) and $this->features[$uniquename]) and !(array_key_exists($uniquename, $this->landmarks) and $this->landmarks[$uniquename])) {
+        $this->logMessage('Cannot find feature to assign FASTA sequence: %uname.',
+          ['%uname' => $uniquename], TRIPAL_WARNING);
+        $count++;
+        continue;
+      }
+
+      if (array_key_exists($uniquename, $this->features)) {
+        $feature = $this->features[$uniquename];
+      }
+      else {
+        $feature = $this->landmarks[$uniquename];
+        $is_landmark = TRUE;
+      }
+
+      $this->ensureFeatureIsLoaded($feature);
+      $id = $feature['feature_id'];
+
+      $residues = [];
+      fseek($this->gff_file_h, $offset);
+      while ($line = fgets($this->gff_file_h)) {
+        if (preg_match('/^>/', $line)) {
+          break;
+        }
+        $residues[] = trim($line);
+      }
+
+      $residues = implode('', $residues);
+      $feature['residues'] = $residues;
+
+      if (!$is_landmark) {
+        $this->features[$uniquename] = $feature;
+      }
+      else {
+        $this->landmarks[$uniquename] = $feature;
       }
       }
+
+      chado_update_record('feature', array(
+        'feature_id' => $id,
+      ), array(
+        'residues' => $residues,
+        'seqlen' => strlen($residues),
+        'md5checksum' => md5($residues),
+      ));
+
+      $count++;
+      $this->setItemsHandled($count);
     }
     }
   }
   }
 
 
@@ -1028,6 +1196,17 @@ class GFF3Importer extends TripalImporter {
         $this->relationships['Child'][$gff_feature['uniquename']] = $gff_feature['Parent'];
         $this->relationships['Child'][$gff_feature['uniquename']] = $gff_feature['Parent'];
       }
       }
 
 
+      // Organize DBs for faster acces later on.
+      foreach ($gff_feature['dbxrefs'] as $index => $info) {
+        if (!array_key_exists($info['db'], $this->db_lookup)) {
+          $this->db_lookup[$info['db']] = FALSE;
+        }
+
+        if (!array_key_exists($index, $this->dbxref_lookup)) {
+          $this->dbxref_lookup[$index] = $info;
+        }
+      }
+
       // Organize the CVterms for faster access later on.
       // Organize the CVterms for faster access later on.
       if (!array_key_exists($gff_feature['type'], $feature_cvterms)) {
       if (!array_key_exists($gff_feature['type'], $feature_cvterms)) {
         $feature_cvterms[$gff_feature['type']] = 0;
         $feature_cvterms[$gff_feature['type']] = 0;
@@ -1044,6 +1223,8 @@ class GFF3Importer extends TripalImporter {
       }
       }
     }
     }
 
 
+    $this->prepDbxrefs();
+
     // Iterate through the feature type terms and get a chado object for each.
     // Iterate through the feature type terms and get a chado object for each.
     $feature_cvterm_ids = [];
     $feature_cvterm_ids = [];
     foreach ($feature_cvterms as $name => $counts) {
     foreach ($feature_cvterms as $name => $counts) {
@@ -1179,7 +1360,9 @@ class GFF3Importer extends TripalImporter {
     $batch_num = 1;
     $batch_num = 1;
     $sql = '';
     $sql = '';
     $args = [];
     $args = [];
+    $batch_features = [];
     foreach ($features as $uniquename => $feature) {
     foreach ($features as $uniquename => $feature) {
+      $batch_features[$uniquename] = $feature;
 
 
       // Only do an insert if this feature doesn't already exist in the databse.
       // Only do an insert if this feature doesn't already exist in the databse.
       if (!$this->doesFeatureAlreadyExist($feature)) {
       if (!$this->doesFeatureAlreadyExist($feature)) {
@@ -1203,7 +1386,7 @@ class GFF3Importer extends TripalImporter {
           $sql = rtrim($sql, ",\n");
           $sql = rtrim($sql, ",\n");
           $sql = $init_sql . $sql;
           $sql = $init_sql . $sql;
           $last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
           $last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
-          $this->assignFeatureIDs($start_id, $last_id);
+          $this->assignFeatureIDs($batch_features);
           $this->setItemsHandled($batch_num);
           $this->setItemsHandled($batch_num);
           $batch_num++;
           $batch_num++;
 
 
@@ -1212,6 +1395,7 @@ class GFF3Importer extends TripalImporter {
           $i = 0;
           $i = 0;
           $args = [];
           $args = [];
           $start_id = $last_id;
           $start_id = $last_id;
+          $batch_features = [];
         }
         }
       }
       }
     }
     }
@@ -1221,9 +1405,12 @@ class GFF3Importer extends TripalImporter {
       $sql = rtrim($sql, ",\n");
       $sql = rtrim($sql, ",\n");
       $sql = $init_sql . $sql;
       $sql = $init_sql . $sql;
       $last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
       $last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
-      $this->assignFeatureIDs($start_id, $last_id);
       $this->setItemsHandled($batch_num);
       $this->setItemsHandled($batch_num);
     }
     }
+
+    if (!empty($batch_features)) {
+      $this->assignFeatureIDs($batch_features);
+    }
   }
   }
 
 
   /**
   /**
@@ -1232,20 +1419,25 @@ class GFF3Importer extends TripalImporter {
    * The start and last IDs should corresopnd to Id's surrounding
    * The start and last IDs should corresopnd to Id's surrounding
    * a batch insert of features.
    * a batch insert of features.
    */
    */
-  private function assignFeatureIDs($start_id, $last_id) {
+  private function assignFeatureIDs($batch_features) {
     // Get the feature Ids for the batch sequences
     // Get the feature Ids for the batch sequences
     $sql = "
     $sql = "
-      SELECT feature_id, uniquename
+      SELECT feature_id
       FROM {feature} F
       FROM {feature} F
-      WHERE feature_id > $start_id and feature_id <= $last_id
+      WHERE uniquename = :uniquename and organism_id = :organism_id and type_id = :type_id
     ";
     ";
-    $results = chado_query($sql);
-    while ($result = $results->fetchObject()) {
-      if (array_key_exists($result->uniquename, $this->features)) {
-        $this->features[$result->uniquename]['feature_id'] = $result->feature_id;
+
+    foreach ($batch_features as $uniquename => $feature) {
+      $result = chado_query($sql, array(
+        ':uniquename' => $feature['uniquename'],
+        ':organism_id' => $feature['organism_id'],
+        ':type_id' => $this->feature_cvterm_lookup[$feature['type']],
+      ))->fetchObject();
+      if (array_key_exists($uniquename, $this->features)) {
+        $this->features[$uniquename]['feature_id'] = $result->feature_id;
       }
       }
-      if (array_key_exists($result->uniquename, $this->landmarks)) {
-        $this->landmarks[$result->uniquename]['feature_id'] = $result->feature_id;
+      if (array_key_exists($uniquename, $this->landmarks)) {
+        $this->landmarks[$uniquename]['feature_id'] = $result->feature_id;
       }
       }
     }
     }
   }
   }
@@ -1321,7 +1513,172 @@ class GFF3Importer extends TripalImporter {
    *
    *
    */
    */
   private function loadDbxrefs() {
   private function loadDbxrefs() {
+    $batch_size = 1000;
+    $num_features = count(array_keys($this->features));
+    $num_batches = (int) ($num_features / $batch_size) + 1;
+
+    $this->setItemsHandled(0);
+    $this->setTotalItems($num_batches);
+
+    // Don't need to use placeholders for this insert since we are only using
+    // integers.
+    $count = 0;
+    $batch_num = 0;
+    $batch_pairs = [];
+    $init_fdbx_sql = "INSERT INTO {feature_dbxref} (feature_id, dbxref_id) VALUES \n";
+    $check_fdbx_sql = "SELECT feature_dbxref_id FROM {feature_dbxref} WHERE feature_id = :feature_id and dbxref_id = :dbxref_id";
+    foreach ($this->features as $uniquename => $feature) {
+      $count++;
+
+      $this->ensureFeatureIsLoaded($feature);
+
+      foreach ($feature['dbxrefs'] as $index => $info) {
+        $feature_id = $feature['feature_id'];
+        $dbx_id = $this->dbxref_lookup[$index];
+
+        // Check that this feature_dbxref is not already in the database.
+        $result = chado_query($check_fdbx_sql, array(
+          ':feature_id' => $feature_id,
+          ':dbxref_id' => $dbx_id,
+        ))->fetchObject() ?? NULL;
+
+        if (!$result) {
+          $batch_pairs[] = "($feature_id, $dbx_id)";
+        }
+      }
+
+      if ($count == $batch_size) {
+        $batch_num++;
+        if (count($batch_pairs) > 0) {
+          // Perform the actual insertion.
+          $fdbx_sql = $init_fdbx_sql . implode(', ', $batch_pairs);
+          $last_id = chado_query($fdbx_sql, array(), array('return' => Database::RETURN_INSERT_ID));
+        }
+
+        $this->setItemsHandled($batch_num);
+
+        $count = 0;
+        $batch_pairs = [];
+      }
+    }
 
 
+    if ($count > 0) {
+      $batch_num++;
+      if (count($batch_pairs) > 0) {
+        // Perform the actual insertion.
+        $fdbx_sql = $init_fdbx_sql . implode(', ', $batch_pairs);
+        $last_id = chado_query($fdbx_sql, array(), array('return' => Database::RETURN_INSERT_ID));
+      }
+
+      $this->setItemsHandled($batch_num);
+    }
+  }
+
+  /**
+   *
+   */
+  private function loadDerivesFroms() {
+    $batch_size = 1000;
+    $num_features = count(array_keys($this->features));
+    $num_batches = (int) ($num_features / $batch_size) + 1;
+
+    $this->setItemsHandled(0);
+    $this->setTotalItems($num_batches);
+
+    $init_sql = "
+      INSERT INTO {feature_relationship}
+        (subject_id, object_id, type_id, rank)
+      VALUES\n";
+
+    $count = 0;
+    $batch_num = 1;
+    $rows = [];
+    $args = [];
+    $derives_id = $this->getCvtermID('derives_from');
+
+    foreach ($this->features as $uniquename => $subject) {
+      if (!$this->doesFeatureAlreadyExist($subject)) {
+        $count++;
+
+        $this->ensureFeatureIsLoaded($subject);
+        if (!empty($subject['derives_from'])) {
+
+          $object = $subject['derives_from'][0];
+          if (array_key_exists($object, $this->features)) {
+            $this->ensureFeatureIsLoaded($this->features[$object]);
+            $object_id = $this->features[$object]['feature_id'];
+          }
+          elseif (array_key_exists($object, $this->landmarks)) {
+            $this->ensureFeatureIsLoaded($this->landmarks[$object]);
+            $object_id = $this->landmarks[$object]['feature_id'];
+          }
+          else {
+            // Derives_from cannot be found in either features or landmarks, so
+            // we need to get the feature id from the database if it exists.
+            $result = chado_select_record('feature', ['type_id'], [
+              'organism_id' => $subject['organism_id'],
+              'uniquename' => $object,
+            ]);
+
+            if (count($result) > 1) {
+              $this->logMessage('Cannot find feature type for "%object" in derives_from relationship. Multiple matching features exist with this uniquename.',
+                ['%object' => $object], TRIPAL_WARNING);
+              continue;
+            }
+            elseif (count($result) == 0) {
+              $this->logMessage('Cannot find feature type for "%object" in derives_from relationship.',
+                ['%object' => $object], TRIPAL_WARNING);
+              continue;
+            }
+
+            $type_id = $result->type_id;
+            $ofeature = chado_select_record('feature', ['feature_id'], [
+              'organism_id' => $subject['organism_id'],
+              'uniquename' => $object,
+              'type_id' => $type_id,
+            ]);
+            if (count($ofeature) == 0) {
+              $this->logMessage("Could not add 'Derives_from' relationship for %uniquename and %object. Object feature, '%object', cannot be found.",
+                [
+                  '%uniquename' => $uniquename,
+                  '%object' => $object,
+                ], TRIPAL_ERROR);
+              continue;
+            }
+
+            $object_id = $ofeature[0]->feature_id;
+          }
+
+          $rows[] = "(:subject_id_$count, :object_id_$count, $derives_id, 0)";
+          $args[":subject_id_$count"] = $subject['feature_id'];
+          $args[":object_id_$count"] = $object_id;
+
+          if ($count == $batch_size) {
+            $batch_num++;
+
+            if (count($rows) > 0) {
+              $sql = $init_sql . implode(",\n", $rows);
+              chado_query($sql, $args, array('return' => Database::RETURN_INSERT_ID));
+            }
+            $this->setItemsHandled($batch_num);
+
+            $rows = [];
+            $args = [];
+            $count = 0;
+          }
+        }
+      }
+    }
+
+    if ($count > 0) {
+      $batch_num++;
+      if (count($rows) > 0) {
+        $sql = $init_sql . implode(",\n", $rows);
+        chado_query($sql, $args, array('return' => Database::RETURN_INSERT_ID));
+      }
+
+      $this->setItemsHandled($batch_num);
+    }
   }
   }
 
 
   /**
   /**
@@ -1367,7 +1724,7 @@ class GFF3Importer extends TripalImporter {
         $args[":srcfeature_id_$i"] = $this->landmarks[$feature['landmark']]['feature_id'];
         $args[":srcfeature_id_$i"] = $this->landmarks[$feature['landmark']]['feature_id'];
         $args[":feature_id_$i"] = $feature['feature_id'];
         $args[":feature_id_$i"] = $feature['feature_id'];
         $args[":fmin_$i"] = $feature['start'];
         $args[":fmin_$i"] = $feature['start'];
-        $args[":fmax_$i"] = $feature['end'];
+        $args[":fmax_$i"] = $feature['stop'];
         $args[":strand_$i"] = $feature['strand'];
         $args[":strand_$i"] = $feature['strand'];
         $args[":phase_$i"] = $feature['phase'] ? $feature['phase'] : NULL;
         $args[":phase_$i"] = $feature['phase'] ? $feature['phase'] : NULL;
         $args[":rank_$i"] = $rank;
         $args[":rank_$i"] = $rank;
@@ -1379,7 +1736,7 @@ class GFF3Importer extends TripalImporter {
           // Insert the batch.
           // Insert the batch.
           $sql = rtrim($sql, ",\n");
           $sql = rtrim($sql, ",\n");
           $sql = $init_sql . $sql;
           $sql = $init_sql . $sql;
-          //$last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
+          $last_id = chado_query($sql, $args, ['return' => Database::RETURN_INSERT_ID]);
           $this->setItemsHandled($batch_num);
           $this->setItemsHandled($batch_num);
           $batch_num++;
           $batch_num++;
 
 
@@ -2112,7 +2469,7 @@ class GFF3Importer extends TripalImporter {
    */
    */
   private function getCvtermID($type, $cv_id = NULL, $is_prop_type = FALSE) {
   private function getCvtermID($type, $cv_id = NULL, $is_prop_type = FALSE) {
     if (!isset($cv_id)) {
     if (!isset($cv_id)) {
-      $cv_id = $this->sequence_cv_id;
+      $cv_id = $this->feature_cv->getValue('cv_id');
     }
     }
     if ($is_prop_type and array_key_exists($type, $this->featureprop_cvterm_lookup)) {
     if ($is_prop_type and array_key_exists($type, $this->featureprop_cvterm_lookup)) {
       return $this->featureprop_cvterm_lookup[$type];
       return $this->featureprop_cvterm_lookup[$type];