Преглед на файлове

2 validation checks for attribute tag name and attribute tag value plus unit test for tag name that contains a comma

Risharde Ramnath преди 4 години
родител
ревизия
2f66751d03

+ 4 - 0
tests/tripal_chado/data/gff_tag_unescaped_character.gff

@@ -0,0 +1,4 @@
+##gff-version 3
+Contig0	FRAEX38873_v2	gene	16315	44054	.	+	.	ID=FRAEX38873_v2_000000010;TES,T=TEST;Name=FRAEX38873_v2_000000010;biotype=protein_coding
+Contig0	FRAEX38873_v2	mRNA	16315	44054	.	+	.	ID=FRAEX38873_v2_000000010.1;Parent=FRAEX38873_v2_000000010;Name=FRAEX38873_v2_000000010.1;biotype=protein_coding;AED=0.05
+Contig0	FRAEX38873_v2	polypeptide	16315	44054	.	+	.	ID=FRAEX38873_v2_000000010.1.3_test_protein;Parent=FRAEX38873_v2_000000010.1

+ 45 - 0
tests/tripal_chado/loaders/GFF3ImporterTest.php

@@ -51,6 +51,51 @@ class GFF3ImporterTest extends TripalTestCase {
     $this->assertEquals($name, $query);
   }
 
+  /**
+   * Run the GFF loader on gff_unescaped_ids.gff for testing.
+   *
+   * This tests whether the GFF loader adds IDs that contain whitespaces. 
+   * The GFF loader should allow it
+   */  
+  public function testGFFImporterUnescapedTagWithComma() {
+    $gff_file = ['file_local' => __DIR__ . '/../data/gff_tag_unescaped_character.gff'];
+    $analysis = factory('chado.analysis')->create();
+    $organism = factory('chado.organism')->create();
+    $run_args = [
+      'analysis_id' => $analysis->analysis_id,
+      'organism_id' => $organism->organism_id,
+      'use_transaction' => 1,
+      'add_only' => 0,
+      'update' => 1,
+      'create_organism' => 0,
+      'create_target' => 0,
+      // regexps for mRNA and protein.
+      're_mrna' => NULL,
+      're_protein' => NULL,
+      // optional
+      'target_organism_id' => NULL,
+      'target_type' => NULL,
+      'start_line' => NULL,
+      'landmark_type' => NULL,
+      'alt_id_attr' => NULL,
+    ];
+
+  
+    $this->loadLandmarks($analysis, $organism);
+    // This should throw an error based on the tag name having the comma
+    $hasException = false;
+    try {
+      $this->runGFFLoader($run_args, $gff_file);
+    }
+    catch (\Exception $ex) {
+      $hasException = true;
+    }
+
+    $this->assertEquals($hasException, true);
+
+  }
+
+
   /**
    * Run the GFF loader on gff_seqid_invalid_character.gff for testing.
    * Seqids seem to also be called landmarks within GFF loader.

+ 17 - 1
tripal_chado/includes/TripalImporter/GFF3Importer.inc

@@ -950,7 +950,7 @@ class GFF3Importer extends TripalImporter {
 
     // Landmark (seqid) validation checks based on GFF3 specifications 
     preg_match('/[a-zA-Z0-9\.:\^\*\$@!\+_\?-\|]*/', $ret['landmark'], $matches);
-    if($matches[0] != $ret['landmark']) {
+    if ($matches[0] != $ret['landmark']) {
       throw new Exception(t("Landmark/seqid !landmark contains invalid 
         characters. Only characters included in this regular expression is 
         allowed [a-zA-Z0-9.:^*$@!+_?-|]", 
@@ -1019,6 +1019,22 @@ class GFF3Importer extends TripalImporter {
       // Break apart each attribute into key/value pairs.
       $tag = preg_split("/=/", $attr, 2);
 
+      // Tag name validation checks based on GFF3 specifications 
+      preg_match('/.+[,=;].+/', $tag[0], $matches);
+      if (count($matches) > 0) {
+        throw new Exception(t('Attribute tag name !tagname contains invalid / 
+        unescaped characters', ['!tagname' => $tag[0]])
+        );
+      }
+
+      // Value validation checks based on GFF3 specifications
+      preg_match('/.+[,=;].+/', $tag[1], $matches);
+      if (count($matches) > 0) {
+        throw new Exception(t('Attribute value name !value contains invalid / 
+        unescaped characters', ['!value' => $tag[1]])
+        );
+      }
+
       // Multiple values of an attribute are separated by commas
       $tag_name = $tag[0];
       if (!array_key_exists($tag_name, $tags)) {