Browse Source

GFF3 loader: Added file caching to reduce memory requirements. Added insert to analyisfeature table

Stephen Ficklin 4 years ago
parent
commit
dbedaf88b0
1 changed files with 275 additions and 236 deletions
  1. 275 236
      tripal_chado/includes/TripalImporter/GFF3Importer.inc

+ 275 - 236
tripal_chado/includes/TripalImporter/GFF3Importer.inc

@@ -43,6 +43,16 @@ class GFF3Importer extends TripalImporter {
    */
   public static $button_text = 'Import GFF3 file';
 
+  /**
+   * A handle to a temporary file for caching the GFF features. This allows for
+   * quick lookup of parsed features without having to store it in RAM.
+   */
+  private $gff_cache_file = NULL;
+
+  /**
+   * The name of the temporary cache file.
+   */
+  private $gff_cache_file_name = NULL;
 
   /**
    * The lines from the ##sequence-region at the top of the GFF
@@ -555,81 +565,95 @@ class GFF3Importer extends TripalImporter {
       }
     }
 
+    // Create the cache file for storing parsed GFF entries.
+    $this->openCacheFile();
+
     // Load the GFF3.
-    $this->logMessage("Step 1: Preloading GFF3 file...                       ");
-    $this->parseGFF3();
+    try {
+      $this->logMessage("Step 1: Caching GFF3 file...                             ");
+      $this->parseGFF3();
 
-    // Prep the database for necessary records.
-    $this->prepSynonms();
-    $this->prepNullPub();
-    $this->prepDBs();
+      // Prep the database for necessary records.
+      $this->prepSynonms();
+      $this->prepNullPub();
+      $this->prepDBs();
 
-    $this->logMessage("Step 2: Load landmarks sequences...                       ");
-    $this->findLandmarks();
-    $this->insertLandmarks();
+      $this->logMessage("Step 2: Insert new landmarks sequences...                 ");
+      $this->findLandmarks();
+      $this->insertLandmarks();
 
-    $this->logMessage("Step 3: Find existing features...                         ");
-    $this->findFeatures();
+      $this->logMessage("Step 3: Find existing features...                         ");
+      $this->findFeatures();
 
-    $this->logMessage("Step 4: Prepare for any updates ...                       ");
-    $this->deleteFeatureData();
+      $this->logMessage("Step 4: Prepare for any updates ...                       ");
+      $this->deleteFeatureData();
 
-    $this->logMessage("Step 5: Loading !num_features features...                 ",
-        ['!num_features' => count(array_keys($this->features))]);
-    $this->insertFeatures();
+      $this->logMessage("Step 5: Processing !num_features features...             ",
+          ['!num_features' => count(array_keys($this->features))]);
+      $this->insertFeatures();
 
-    $this->logMessage("Step 6: Get new feature IDs...                            ");
-    $this->findFeatures();
+      $this->logMessage("Step 6: Get new feature IDs...                           ");
+      $this->findFeatures();
 
-    $this->logMessage("Step 7: Loading locations...                              ");
-    $this->insertFeatureLocs();
+      $this->logMessage("Step 7: Insert locations...                                ");
+      $this->insertFeatureLocs();
 
-    $this->logMessage("Step 8: Loading properties...                             ");
-    $this->insertFeatureProps();
+      $this->logMessage("Step 8: Insert properties...                               ");
+      $this->insertFeatureProps();
 
-    $this->logMessage("Step 9: Finding synonyms (aliases)...                     ");
-    $this->findSynonyms();
+      $this->logMessage("Step 9: Find synonyms (aliases)...                       ");
+      $this->findSynonyms();
+
+      $this->logMessage("Step 10: Insert new synonyms (aliases)...                  ");
+      $this->insertSynonyms();
 
-    $this->logMessage("Step 10: Loading synonsyms (aliases)...                   ");
-    $this->insertSynonyms();
+      $this->logMessage("Step 11: Insert feature synonyms (aliases)...              ");
+      $this->insertFeatureSynonyms();
 
-    $this->logMessage("Step 11: Loading feature synonyms (aliases)...            ");
-    $this->insertFeatureSynonyms();
+      $this->logMessage("Step 12: Find cross references...                        ");
+      $this->findDbxrefs();
 
-    $this->logMessage("Step 12: Finding existing cross references...             ");
-    $this->findDbxrefs();
+      $this->logMessage("Step 13: Insert new cross references...                    ");
+      $this->insertDbxrefs();
 
-    $this->logMessage("Step 13: Loading cross references...                      ");
-    $this->insertDbxrefs();
+      $this->logMessage("Step 14: Get new cross references IDs...                 ");
+      $this->findDbxrefs();
 
-    $this->logMessage("Step 14: Retrieving loaded cross references...            ");
-    $this->findDbxrefs();
+      $this->logMessage("Step 15: Insert feature cross references...                ");
+      $this->insertFeatureDbxrefs();
 
-    $this->logMessage("Step 15: Loading feature cross references...              ");
-    $this->insertFeatureDbxrefs();
+      $this->logMessage("Step 16: Insert feature ontology terms...                  ");
+      $this->insertFeatureCVterms();
 
-    $this->logMessage("Step 16: Loading feature ontology terms...                ");
-    $this->insertFeatureCVterms();
+      $this->logMessage("Step 17: Add child-parent relationships...               ");
+      $this->findChildRanks();
+      $this->insertFeatureParents();
 
-    $this->logMessage("Step 17: Associate child-parent relationships...          ");
-    $this->findChildRanks();
-    $this->insertFeatureParents();
+      $this->logMessage("Step 18: Insert 'derives_from' relationships...            ");
+      $this->insertFeatureDerivesFrom();
 
-    $this->logMessage("Step 18: Loading 'derives_from' relationships...          ");
-    $this->insertFeatureDerivesFrom();
+      $this->logMessage("Step 19: Insert Targets...                                 ");
+      // TODO: Target (target_organism & target_type)
 
-    $this->logMessage("Step 19: Loading Targets...                               ");
-    // TODO: Target (target_organism & target_type)
+      $this->logMessage("Step 20: Add any missing proteins...                     ");
+      // TODO: protein records.
 
-    $this->logMessage("Step 20: Adding any missing proteins...                   ");
-    // TODO: protein records.
+      // TODO: handle is_circular (it may just need to be a property).
 
-    // TODO: handle is_circular (it may just need to be a property).
+      $this->logMessage("Step 21: Associate features with analysis....             ");
+      $this->insertFeatureAnalysis();
 
-    if (!empty($this->residue_index)) {
-      $this->logMessage("Step 21: Adding sequences if available...                 ");
-      $this->insertFeatureSeqs();
+      if (!empty($this->residue_index)) {
+        $this->logMessage("Step 22: Adding sequences if available...              ");
+        //$this->insertFeatureSeqs();
+      }
+    }
+    // On exception, catch the error, clean up the cache file and rethrow
+    catch (Exception $e) {
+      $this->closeCacheFile();
+      throw $e;
     }
+
   }
 
 
@@ -674,24 +698,19 @@ class GFF3Importer extends TripalImporter {
     ]);
     $cvterm_id = $result->fetchField();
 
-    // If the term couldn't be found and it's a property term then insert it.
+    // If the term couldn't be found and it's a property term then insert it
+    // as a local term.
     if (!$cvterm_id) {
-      if($is_prop_type) {
-        $term = [
-          'id' => "local:$type",
-          'name' => $type,
-          'is_obsolete' => 0,
-          'cv_name' => $cv->getValue('name'),
-          'db_name' => 'local',
-          'is_relationship' => FALSE,
-        ];
-        $cvterm = (object) chado_insert_cvterm($term, ['update_existing' => FALSE]);
-        $cvterm_id = $cvterm->cvterm_id;
-      }
-      else {
-        throw new Exception(t('The CVterm, "!term", cannot be found in the vocabulary: "!cv_name".',
-            ['!term' => $type, '!cv_name' => $cv->getValue('name')]));
-      }
+      $term = [
+        'id' => "local:$type",
+        'name' => $type,
+        'is_obsolete' => 0,
+        'cv_name' => $cv->getValue('name'),
+        'db_name' => 'local',
+        'is_relationship' => FALSE,
+      ];
+      $cvterm = (object) chado_insert_cvterm($term, ['update_existing' => FALSE]);
+      $cvterm_id = $cvterm->cvterm_id;
     }
 
     if ($is_prop_type) {
@@ -932,8 +951,8 @@ class GFF3Importer extends TripalImporter {
         continue;
       }
       if (!preg_match('/^[^\=]+\=.+$/', $attr)) {
-        throw new Exception(t('Attribute is not correctly formatted on line %line_num: %attr',
-            ['%line_num' => $this->current_line, '%attr' => $attr]));
+        throw new Exception(t('Attribute is not correctly formatted on line !line_num: !attr',
+            ['!line_num' => $this->current_line, '!attr' => $attr]));
       }
 
       // Break apart each attribute into key/value pairs.
@@ -1214,10 +1233,19 @@ class GFF3Importer extends TripalImporter {
    * Loads a single landmark by name.
    */
   private function insertLandmark($name) {
-
-    $landmark = $this->insertFeature($this->organism, $this->analysis, $this->landmark_cvterm, $name,
-        $name, '', 'f', 'f', 1, 0);
-    $this->landmarks[$name] = $landmark->getValue('feature_id');
+    $feature = new ChadoRecord('feature');
+    $residues = '';
+    $feature->setValues([
+      'organism_id' => $this->organism->getValue('organism_id'),
+      'uniquename' => $name,
+      'name' => $name,
+      'type_id' => $this->landmark_cvterm->getValue('cvterm_id'),
+      'md5checksum' => md5($residues),
+      'is_analysis' => FALSE,
+      'is_obsolete' => FALSE,
+    ]);
+    $feature->insert();
+    $this->landmarks[$name] = $feature->getID();
   }
 
   /**
@@ -1268,7 +1296,6 @@ class GFF3Importer extends TripalImporter {
 
       // Parse this feature from this line of the GFF3 file.
       $gff_feature = $this->parseFeature($line);
-      $gff_feature['feature_id'] = NULL;
 
       // A feature may get ignored. But let's default this to FALSE.
       $gff_feature['skipped'] = FALSE;
@@ -1298,6 +1325,7 @@ class GFF3Importer extends TripalImporter {
           $this->dbxref_lookup[$index] = $info;
         }
       }
+
       // We want to make sure the Ontology_term attribute dbxrefs are
       // also easily looked up... but we do not want to create them
       // if they do not exist the precense of the 'cvterm' key will
@@ -1313,7 +1341,6 @@ class GFF3Importer extends TripalImporter {
         }
       }
 
-
       // Organize the CVterms for faster access later on.
       if (!array_key_exists($gff_feature['type'], $feature_cvterms)) {
         $feature_cvterms[$gff_feature['type']] = 0;
@@ -1329,11 +1356,10 @@ class GFF3Importer extends TripalImporter {
         $featureprop_cvterms[$prop_name]++;
       }
 
-      // Store the GFF feature details.
+      // Cache the GFF feature details for later lookup.
       if ($gff_feature['uniquename'] != $gff_feature['landmark']) {
-        $this->features[$gff_feature['uniquename']] = $gff_feature;
+        $this->cacheFeature($gff_feature);
       }
-
     }
 
     // Iterate through the feature type terms and get a chado object for each.
@@ -1348,6 +1374,47 @@ class GFF3Importer extends TripalImporter {
     }
   }
 
+  /**
+   * Opens the cache file for read/write access.
+   */
+  private function openCacheFile() {
+    $temp_file = drupal_tempnam('temporary://', "TripalGFF3Import_");
+    $this->gff_cache_file_name = drupal_realpath($temp_file);
+    $this->logMessage("Opening temporary cache file: !cfile",
+        ['!cfile' => $this->gff_cache_file_name]);
+    $this->gff_cache_file = fopen($this->gff_cache_file_name, "r+");
+  }
+
+  /**
+   * Closes and cleans up the cache file.
+   */
+  private function closeCacheFile() {
+    fclose($this->gff_cache_file);
+    $this->logMessage("Removing temporary cache file: !cfile",
+        ['!cfile' => $this->gff_cache_file_name]);
+    unlink($this->gff_cache_file_name);
+  }
+
+  /**
+   * Caches the processed feature from a GFF3 file
+   */
+  private function cacheFeature($gff_feature) {
+    $findex = ftell($this->gff_cache_file);
+    fwrite($this->gff_cache_file, serialize($gff_feature) . "\n");
+    $this->features[$gff_feature['uniquename']]['findex'] = $findex;
+    $this->features[$gff_feature['uniquename']]['feature_id'] = NULL;
+  }
+
+  /**
+   * Retrieves a feature using its index from the cache file.
+   */
+  private function getCachedFeature($findex) {
+    fseek($this->gff_cache_file, $findex);
+    $feature = fgets($this->gff_cache_file);
+    $feature = unserialize($feature);
+    return $feature;
+  }
+
   /**
    * Imports the landmark features into Chado.
    */
@@ -1393,17 +1460,21 @@ class GFF3Importer extends TripalImporter {
     $batch_num = 1;
     $sql = '';
     $args = [];
-    foreach ($this->features as $feature) {
+    foreach ($this->features as $uniquename => $info) {
+      $findex = $info['findex'];
+      $feature_id = $info['feature_id'];
+      $feature = $this->getCachedFeature($findex);
+
       $total++;
+      $i++;
 
       // Only do an insert if this feature doesn't already exist in the databse.
-      if (!$feature['feature_id'] and !$feature['skipped']) {
-        $i++;
+      if (!$feature_id and !$feature['skipped']) {
         $residues = $this->getResidues($feature, FALSE);
         $type_id = $this->feature_cvterm_lookup[strtolower($feature['type'])];
         $sql .= "(:uniquename_$i, :name_$i, :type_id_$i, :organism_id_$i, :residues_$i, " .
                " :md5checksum_$i, :seqlen_$i, FALSE, FALSE),\n";
-        $args[":uniquename_$i"] = $feature['uniquename'];
+        $args[":uniquename_$i"] = $uniquename;
         $args[":name_$i"] = $feature['name'];
         $args[":type_id_$i"] = $type_id;
         $args[":organism_id_$i"] = $feature['organism'] ? $feature['organism'] : $this->organism->getID();
@@ -1446,10 +1517,11 @@ class GFF3Importer extends TripalImporter {
     $total = 0;
     $batch_num = 1;
     $names = [];
-    foreach ($this->features as $uniquename => $feature) {
+    foreach ($this->features as $uniquename => $info) {
+      $feature_id = $info['feature_id'];
       $total++;
 
-      if (!$feature['feature_id']) {
+      if (!$feature_id) {
         $i++;
         $names[] = $uniquename;
       }
@@ -1460,11 +1532,10 @@ class GFF3Importer extends TripalImporter {
           $args = [':uniquenames' => $names];
           $results = chado_query($sql, $args);
           while ($f = $results->fetchObject()) {
-
-            $matched_feature = $this->features[$f->uniquename];
+            $matched_findex = $this->features[$f->uniquename]['findex'];
+            $matched_feature = $this->getCachedFeature($matched_findex);
             $matched_type_id = $this->feature_cvterm_lookup[strtolower($matched_feature['type'])];
             $matched_organism_id = $matched_feature['organism'] ? $matched_feature['organism'] : $this->organism->getID();
-
             if ($matched_type_id == $f->type_id and $matched_organism_id == $f->organism_id) {
               $this->features[$f->uniquename]['feature_id'] = $f->feature_id;
             }
@@ -1496,16 +1567,21 @@ class GFF3Importer extends TripalImporter {
     $sql4 = "DELETE from {feature_dbxref} WHERE feature_id IN (:feature_ids)";
     $sql5 = "DELETE from {feature_synonym} WHERE feature_id IN (:feature_ids)";
     $sql6 = "DELETE from {feature_relationship} WHERE subject_id IN (:feature_ids)";
+    $sql7 = "DELETE from {analysisfeature} WHERE feature_id IN (:feature_ids)";
     $i = 0;
     $total = 0;
     $batch_num = 1;
     $feature_ids = [];
-    foreach ($this->features as $feature) {
+    foreach ($this->features as $uniquename => $info) {
+      $findex = $info['findex'];
+      $feature_id = $info['feature_id'];
+      $feature = $this->getCachedFeature($findex);
+
       $total++;
+      $i++;
 
-      if ($feature['feature_id'] and !$feature['skipped']) {
-        $i++;
-        $feature_ids[] = $feature['feature_id'];
+      if ($feature_id and !$feature['skipped']) {
+        $feature_ids[] = $feature_id;
       }
 
       // If we've reached the size of the batch then let's do the insert.
@@ -1518,6 +1594,7 @@ class GFF3Importer extends TripalImporter {
           chado_query($sql4, $args);
           chado_query($sql5, $args);
           chado_query($sql6, $args);
+          chado_query($sql7, $args);
         }
         $this->setItemsHandled($batch_num);
         $batch_num++;
@@ -1547,7 +1624,11 @@ class GFF3Importer extends TripalImporter {
     $batch_num = 1;
     $sql = '';
     $args = [];
-    foreach ($this->features as $feature) {
+    foreach ($this->features as $uniquename => $info) {
+      $findex = $info['findex'];
+      $feature_id = $info['feature_id'];
+      $feature = $this->getCachedFeature($findex);
+
       $total++;
 
       // If the feature is not skipped
@@ -1559,14 +1640,8 @@ class GFF3Importer extends TripalImporter {
           foreach ($values as $rank => $value) {
             $j++;
             $type_id = $this->featureprop_cvterm_lookup[strtolower($prop_name)];
-            if (!$type_id) {
-              print $prop_name . "!!!!!!!!!!!!!!!!!!!!\n";
-              print_r($this->featureprop_cvterm_lookup);
-              print_r($feature['properties']);
-              exit;
-            }
             $sql .= "(:feature_id_$j, :type_id_$j, :value_$j, :rank_$j),\n";
-            $args[":feature_id_$j"] = $feature['feature_id'];
+            $args[":feature_id_$j"] = $feature_id;
             $args[":type_id_$j"] = $type_id;
             $args[":value_$j"] = $value;
             $args[":rank_$j"] = $rank;
@@ -1614,17 +1689,22 @@ class GFF3Importer extends TripalImporter {
     $args = [];
     foreach ($this->parent_lookup as $parent => $children) {
       $total++;
+      $i++;
 
-      $parent_feature = $this->features[$parent];
+      $parent_feature = $this->getCachedFeature($this->features[$parent]['findex']);
+      $parent_uniquename = $parent_feature['uniquename'];
+      $parent_feature_id = $this->features[$parent_uniquename]['feature_id'];
       if (!$parent_feature['skipped']) {
-        $i++;
 
         $rank = 0;
-        foreach ($children as $feature_id) {
+        foreach ($children as $child_findex) {
           $j++;
+          $child_feature = $this->getCachedFeature($child_findex);
+          $child_uniquename = $child_feature['uniquename'];
+          $child_feature_id = $this->features[$child_uniquename]['feature_id'];
           $sql .= "(:subject_id_$j, :object_id_$j, :type_id_$j, :rank_$j),\n";
-          $args[":subject_id_$j"] = $feature_id;
-          $args[":object_id_$j"] = $this->features[$parent]['feature_id'];
+          $args[":subject_id_$j"] = $child_feature_id;
+          $args[":object_id_$j"] = $parent_feature_id;
           $args[":type_id_$j"] = $type_id;
           $args[":rank_$j"] = $rank;
           $rank++;
@@ -1715,12 +1795,13 @@ class GFF3Importer extends TripalImporter {
    */
   private function findChildRanks() {
     // Iterate through parent-child relationships and set the ranks.
-    foreach ($this->features as $uniquename => $feature) {
+    foreach ($this->features as $uniquename => $info) {
+      $feature = $this->getCachedFeature($info['findex']);
       if ($feature['parent']) {
         // place features in order that they appear by their start coordinates.
         $parent = $feature['parent'];
         $start = $feature['start'];
-        $this->parent_lookup[$parent][$start] = $feature['feature_id'];
+        $this->parent_lookup[$parent][$start] = $info['findex'];
       }
     }
   }
@@ -1838,7 +1919,10 @@ class GFF3Importer extends TripalImporter {
     $batch_num = 1;
     $sql = '';
     $args = [];
-    foreach ($this->features as $feature) {
+    foreach ($this->features as $uniquename => $info) {
+      $findex = $info['findex'];
+      $feature_id = $info['feature_id'];
+      $feature = $this->getCachedFeature($findex);
       $total++;
 
       // If the feature is not skipped
@@ -1846,10 +1930,10 @@ class GFF3Importer extends TripalImporter {
         $i++;
 
         // Iterate through all of the dbxrefs of this feature.
-        foreach ($feature['dbxrefs'] as $index => $info) {
+        foreach ($feature['dbxrefs'] as $index => $details) {
           $j++;
           $sql .= "(:feature_id_$j, :dbxref_id_$j),\n";
-          $args[":feature_id_$j"] = $feature['feature_id'];
+          $args[":feature_id_$j"] = $feature_id;
           $args[":dbxref_id_$j"] = $this->dbxref_lookup[$index]['dbxref_id'];
         }
       }
@@ -1893,7 +1977,11 @@ class GFF3Importer extends TripalImporter {
     $batch_num = 1;
     $sql = '';
     $args = [];
-    foreach ($this->features as $feature) {
+    foreach ($this->features as $uniquename => $info) {
+      $findex = $info['findex'];
+      $feature_id = $info['feature_id'];
+      $feature = $this->getCachedFeature($findex);
+
       $total++;
 
       // If the feature is not skipped
@@ -1904,7 +1992,7 @@ class GFF3Importer extends TripalImporter {
         foreach ($feature['terms'] as $index => $info) {
           $j++;
           $sql .= "(:feature_id_$j, :cvterm_id_$j, :pub_id_$j),\n";
-          $args[":feature_id_$j"] = $feature['feature_id'];
+          $args[":feature_id_$j"] = $feature_id;
           $args[":cvterm_id_$j"] = $this->cvterm_lookup[$index];
           $args[":pub_id_$j"] = $this->null_pub->pub_id;
         }
@@ -1950,15 +2038,19 @@ class GFF3Importer extends TripalImporter {
     $batch_num = 1;
     $sql = '';
     $args = [];
-    foreach ($this->features as $feature) {
+    foreach ($this->features as $uniquename => $info) {
+      $findex = $info['findex'];
+      $feature_id = $info['feature_id'];
+      $feature = $this->getCachedFeature($findex);
+
       $total++;
+      $i++;
 
       // If the feature is not skipped
       if (!$feature['skipped'] and $feature['derives_from']) {
-        $i++;
         $object_id = $this->features[$feature['derives_from']]['feature_id'];
         $sql .= "(:subject_id_$i, :object_id_$i, :type_id_$i, 0),\n";
-        $args[":subject_id_$i"] = $feature['feature_id'];
+        $args[":subject_id_$i"] = $feature_id;
         $args[":object_id_$i"] = $object_id;
         $args[":type_id_$i"] = $type_id;
       }
@@ -2002,7 +2094,10 @@ class GFF3Importer extends TripalImporter {
     $batch_num = 1;
     $sql = '';
     $args = [];
-    foreach ($this->features as $feature) {
+    foreach ($this->features as $info) {
+      $findex = $info['findex'];
+      $feature_id = $info['feature_id'];
+      $feature = $this->getCachedFeature($findex);
 
       $total++;
 
@@ -2013,20 +2108,22 @@ class GFF3Importer extends TripalImporter {
         // Get the rank of this feature by iterating through all siblings of the
         // parent and finding where this feature is in terms of start position.
         $rank = 0;
-        if (array_key_exists('Parent', $feature)) {
-          $children = $this->parent_lookup[$feature['parent']];
-          foreach ($children as $sib_start => $feature_id) {
-            if ($sib_start == $feature['start']) {
-              break;
+        if (array_key_exists('parent', $feature)) {
+          $children_start = $this->parent_lookup[$feature['parent']];
+          if (is_array($children_start)) {
+            foreach (array_keys($children_start) as $sib_start) {
+              if ($sib_start == $feature['start']) {
+                break;
+              }
+              $rank++;
             }
-            $rank++;
           }
         }
 
         $sql .= "(:srcfeature_id_$i, :feature_id_$i, :fmin_$i, :fmax_$i," .
                 " :strand_$i, :phase_$i, :rank_$i),\n";
         $args[":srcfeature_id_$i"] = $this->landmarks[$feature['landmark']];
-        $args[":feature_id_$i"] = $feature['feature_id'];
+        $args[":feature_id_$i"] = $feature_id;
         $args[":fmin_$i"] = $feature['start'];
         $args[":fmax_$i"] = $feature['stop'];
         $args[":strand_$i"] = $feature['strand'];
@@ -2108,13 +2205,18 @@ class GFF3Importer extends TripalImporter {
     $sql = '';
     $args = [];
     $batch_synonyms = [];
-    foreach ($this->features as $uniquename => $feature) {
+    foreach ($this->features as $uniquename => $info) {
+      $findex = $info['findex'];
+      $feature = $this->getCachedFeature($findex);
+
       $i++;
       $total++;
 
       // Get all of the synonyms for this batch.
-      foreach ($feature['synonyms'] as $index => $synonym) {
-        $batch_synonyms[] = $synonym;
+      if (array_key_exists('synonyms', $feature)) {
+        foreach ($feature['synonyms'] as $index => $synonym) {
+          $batch_synonyms[] = $synonym;
+        }
       }
 
       // If we've reached the size of the batch then let's do the select
@@ -2219,7 +2321,11 @@ class GFF3Importer extends TripalImporter {
     $total = 0;
     $batch_num = 1;
     $args = [];
-    foreach ($this->features as $feature) {
+    foreach ($this->features as $uniquename => $info) {
+      $findex = $info['findex'];
+      $feature_id = $info['feature_id'];
+      $feature = $this->getCachedFeature($findex);
+
       $total++;
 
       // If the feature is not skipped
@@ -2230,9 +2336,8 @@ class GFF3Importer extends TripalImporter {
         foreach (array_unique($feature['synonyms']) as $synonym) {
           $j++;
           $sql .= "(:synonym_id_$j, :feature_id_$j, :pub_id_$j),\n";
-          $synonym_id = $this->synonym_lookup[$synonym];
           $args[":synonym_id_$j"] = $this->synonym_lookup[$synonym];
-          $args[":feature_id_$j"] = $feature['feature_id'];
+          $args[":feature_id_$j"] = $feature_id;
           $args[":pub_id_$j"] = $this->null_pub->pub_id;
         }
       }
@@ -2447,124 +2552,58 @@ class GFF3Importer extends TripalImporter {
   }
 
   /**
-   * Create the feature record & link it to it's analysis
-   *
-   * @param $organism
-   * @param $analysis_id
-   * @param $cvterm
-   * @param $uniquename
-   * @param $name
-   * @param $residues
-   * @param $is_analysis
-   * @param $is_obsolete
-   * @param $add_only
-   * @param $score
    *
-   * @ingroup gff3_loader
    */
-  private function insertFeature($organism, $analysis, $cvterm, $uniquename,
-                                 $name, $residues, $is_analysis = 'f', $is_obsolete = 'f', $add_only, $score) {
+  private function insertFeatureAnalysis() {
+    $batch_size = 1000;
+    $num_features = count(array_keys($this->features));
+    $num_batches = (int) ($num_features / $batch_size) + 1;
 
-    if (strcmp($is_obsolete, 'f') == 0 or $is_obsolete == 0) {
-       $is_obsolete = 'FALSE';
-    }
-    if (strcmp($is_obsolete, 't') == 0 or $is_obsolete == 1) {
-       $is_obsolete = 'TRUE';
-    }
-    if (strcmp($is_analysis, 'f') == 0 or $is_analysis == 0) {
-       $is_analysis = 'FALSE';
-    }
-    if (strcmp($is_analysis, 't') == 0 or $is_analysis == 1) {
-       $is_analysis = 'TRUE';
-    }
+    $this->setItemsHandled(0);
+    $this->setTotalItems($num_batches);
 
-    // Check to see if the feature already exists.
-    $feature = new ChadoRecord('feature');
-    $feature->setValues([
-       'organism_id' => $organism->getValue('organism_id'),
-       'uniquename' => $uniquename,
-       'type_id' => $cvterm->getValue('cvterm_id'),
-    ]);
-    $num_matches = $feature->find();
+    $init_sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id, significance) VALUES \n";
+    $i = 0;
+    $total = 0;
+    $batch_num = 1;
+    $args = [];
+    foreach ($this->features as $info) {
+      $findex = $info['findex'];
+      $feature_id = $info['feature_id'];
+      $feature = $this->getCachedFeature($findex);
 
+      $i++;
+      $total++;
 
-    // Insert the feature if it does not exist otherwise perform an update.
-    if ($num_matches == 0) {
-      $feature->setValue('name', $name);
-      $feature->setValue('md5checksum', md5($residues));
-      $feature->setValue('is_analysis', $is_analysis);
-      $feature->setValue('is_obsolete', $is_obsolete);
-      try {
-        $feature->insert();
-      }
-      catch (Exception $e) {
-        $this->logMessage("Failed to insert feature '$uniquename' (" . $cvterm->getValue('name') . ").", [], TRIPAL_WARNING);
-        return 0;
-      }
-    }
-    elseif (!$add_only) {
-      if ($num_matches > 1) {
-        $this->logMessage("Failed to update feature '$uniquename' (" . $cvterm->getValue('name') . "). More than one feature exists with these criteria", [], TRIPAL_WARNING);
-        return 0;
-      }
-      $feature->setValue('name', $name);
-      $feature->setValue('md5checksum', md5($residues));
-      $feature->setValue('is_analysis', $is_analysis);
-      $feature->setValue('is_obsolete', $is_obsolete);
-      try {
-        $feature->update();
-      }
-      catch (Exception $e) {
-        $this->logMessage("Failed to update feature '$uniquename' (" . $cvterm->getValue('name') . ").", [], TRIPAL_WARNING);
-        return 0;
+      // If the feature is not skipped then add it to the table
+      if (!$feature['skipped']) {
+        $sql .= "(:feature_id_$i, :analysis_id_$i, :significance_$i),\n";
+        $args[":feature_id_$i"] = $feature_id;
+        $args[":analysis_id_$i"] = $this->analysis->getID();
+        if (strcmp($feature['score'], '.') != 0) {
+          $args[":significance_$i"] = $feature['score'];
+        }
+        else {
+          $args[":significance_$i"] = NULL;
+        }
       }
-    }
-    else {
-      // The feature exists and we don't want to update it so return
-      // a value of 0.  This will stop all downstream property additions
-      return $feature;
-    }
 
-    // Add the analysisfeature entry to the analysisfeature table if
-    // it doesn't already exist.
-    $af = new ChadoRecord('analysisfeature');
-    $af->setValues([
-        'analysis_id' => $analysis->getValue('analysis_id'),
-        'feature_id' => $feature->getID(),
-    ]);
-    $num_afs = $af->find();
-    if ($num_afs == 0) {
-      // if a score is available then set that to be the significance field
-      if (strcmp($score, '.') != 0) {
-        $af->setValue('significance', $score);
-      }
-      try {
-        $af->insert();
-      }
-      catch (Exception $e) {
-        $this->logMessage("Could not add analysisfeature record: " . $analysis->getValue('analysis_id') . ", " .  $feature->getID() . ". " . $e->getMessage(), [], TRIPAL_WARNING);
-      }
-    }
-    else {
-      // if a score is available then set that to be the significance field
-      $new_vals = [];
-      if (strcmp($score, '.') != 0) {
-        $af->setValue('significance', $score);
-      }
-      else {
-        $af->setValue('significance', '__NULL__');
-      }
-      if (!$add_only) {
-        try {
-          $af->update();
-        }
-        catch (Exception $e) {
-          $this->logMessage("Could not update analysisfeature record: $analysis_id, " . $feature->getID() . ". " . $e->getMessage(), [], TRIPAL_WARNING);
+      // If we've reached the size of the batch then let's do the insert.
+      if ($i == $batch_size or $total == $num_features) {
+        if (count($args) > 0) {
+          $sql = rtrim($sql, ",\n");
+          $sql = $init_sql . $sql;
+          chado_query($sql, $args);
         }
+        $this->setItemsHandled($batch_num);
+        $batch_num++;
+
+        // Now reset all of the varables for the next batch.
+        $sql = '';
+        $i = 0;
+        $args = [];
       }
     }
-
-    return $feature;
   }
 
   /**