Browse Source

2 additional unit tests and corresponding gff files related to ID validations for unescaped whitespaces and IDs beginning with right arrow character

Risharde Ramnath 4 years ago
parent
commit
5e98649061

+ 4 - 0
tests/tripal_chado/data/gff_rightarrow_id.gff

@@ -0,0 +1,4 @@
+##gff-version 3
+Contig0	FRAEX38873_v2	gene	16315	44054	.	+	.	ID=>FRAEX38873_v2_000000010;Name=FRAEX38873_v2_000000010;biotype=protein_coding
+Contig0	FRAEX38873_v2	mRNA	16315	44054	.	+	.	ID=FRAEX38873_v2_000000010.1;Parent=FRAEX38873_v2_000000010;Name=FRAEX38873_v2_000000010.1;biotype=protein_coding;AED=0.05
+Contig0	FRAEX38873_v2	polypeptide	16315	44054	.	+	.	ID=FRAEX38873_v2_000000010.1.3_test_protein;Parent=FRAEX38873_v2_000000010.1

+ 4 - 0
tests/tripal_chado/data/gff_unescaped_ids.gff

@@ -0,0 +1,4 @@
+##gff-version 3
+Contig0	FRAEX38873_v2	gene	16315	44054	.	+	.	ID=FRAEX38873_v2_000000010 SPACED;Name=FRAEX38873_v2_000000010;biotype=protein_coding
+Contig0	FRAEX38873_v2	mRNA	16315	44054	.	+	.	ID=FRAEX38873_v2_000000010.1;Parent=FRAEX38873_v2_000000010;Name=FRAEX38873_v2_000000010.1;biotype=protein_coding;AED=0.05
+Contig0	FRAEX38873_v2	polypeptide	16315	44054	.	+	.	ID=FRAEX38873_v2_000000010.1.3_test_protein;Parent=FRAEX38873_v2_000000010.1

+ 89 - 0
tests/tripal_chado/loaders/GFF3ImporterTest.php

@@ -51,6 +51,95 @@ class GFF3ImporterTest extends TripalTestCase {
     $this->assertEquals($name, $query);
   }
 
+  /**
+   * Run the GFF loader on gff_unescaped_ids.gff for testing.
+   *
+   * This tests whether the GFF loader detects invalid ID that contains  
+   * unescaped whitespaces. The GFF loader should throw an exception which this
+   * unit test detects.
+   */  
+  public function testGFFImporterUnescapedWhitespaceID() {
+    $gff_file = ['file_local' => __DIR__ . '/../data/gff_unescaped_ids.gff'];
+    $analysis = factory('chado.analysis')->create();
+    $organism = factory('chado.organism')->create();
+    $run_args = [
+      'analysis_id' => $analysis->analysis_id,
+      'organism_id' => $organism->organism_id,
+      'use_transaction' => 1,
+      'add_only' => 0,
+      'update' => 1,
+      'create_organism' => 0,
+      'create_target' => 0,
+      ///regexps for mRNA and protein.
+      're_mrna' => NULL,
+      're_protein' => NULL,
+      //optional
+      'target_organism_id' => NULL,
+      'target_type' => NULL,
+      'start_line' => NULL,
+      'landmark_type' => NULL,
+      'alt_id_attr' => NULL,
+    ];
+
+    $hasException = false;
+    try {    
+      $this->loadLandmarks($analysis, $organism);
+      // This will produce an exception due to unescaped whitespace in ID
+      $this->runGFFLoader($run_args, $gff_file);
+    }
+    catch(\Exception $ex) {
+      $hasException = true;
+    }
+
+    // We expect an exception to happen so we are looking for a return of true
+    $this->assertEquals($hasException, true);
+  }
+
+  /**
+   * Run the GFF loader on gff_rightarrow_ids.gff for testing.
+   *
+   * This tests whether the GFF loader detects invalid ID that contains  
+   * beginning arrow >. The GFF loader should throw an exception which this
+   * unit detects.
+   */  
+  public function testGFFImporterRightArrowID() {
+    $gff_file = ['file_local' => __DIR__ . '/../data/gff_rightarrow_id.gff'];
+    $analysis = factory('chado.analysis')->create();
+    $organism = factory('chado.organism')->create();
+    $run_args = [
+      'analysis_id' => $analysis->analysis_id,
+      'organism_id' => $organism->organism_id,
+      'use_transaction' => 1,
+      'add_only' => 0,
+      'update' => 1,
+      'create_organism' => 0,
+      'create_target' => 0,
+      ///regexps for mRNA and protein.
+      're_mrna' => NULL,
+      're_protein' => NULL,
+      //optional
+      'target_organism_id' => NULL,
+      'target_type' => NULL,
+      'start_line' => NULL,
+      'landmark_type' => NULL,
+      'alt_id_attr' => NULL,
+    ];
+
+    $hasException = false;
+    try {    
+      $this->loadLandmarks($analysis, $organism);
+      // This will produce an exception due to unescaped whitespace in ID
+      $this->runGFFLoader($run_args, $gff_file);
+    }
+    catch(\Exception $ex) {
+      $hasException = true;
+    }
+
+    // We expect an exception to happen so we are looking for a return of true
+    $this->assertEquals($hasException, true);
+  }
+
+
   /**
    * Run the GFF loader on gff_duplicate_ids.gff for testing.
    *

+ 6 - 0
tripal_chado/includes/TripalImporter/GFF3Importer.inc

@@ -2765,6 +2765,12 @@ class GFF3Importer extends TripalImporter {
     }
     else {
       $uniquename = $attrs['ID'][0];
+      if(preg_match('[\\][\s]', $uniquename) == 0 && preg_match('[\s]', $uniquename) == 1) {
+        throw new Exception(t("ID !uniquename has one or more unescaped whitespaces", ['!uniquename' => $uniquename]));
+      }
+      if(substr($uniquename, 0, 1) == ">") {
+        throw new Exception(t("ID !uniquename should not begin with a right arrow >", ['!uniquename' => $uniquename]));
+      }
       $name = $attrs['Name'][0];
     }