Browse Source

Merge pull request #738 from tripal/676_allow_no_regexp

676 allow no regexp for parent in FASTA importer
Stephen Ficklin 6 years ago
parent
commit
902724a22a

+ 4 - 0
tests/tripal_chado/data/two_prots.fasta

@@ -0,0 +1,4 @@
+>FRAEX38873_v2_000000010.1
+MDQNQFANELISSYFLQQWRHNSQTLTLNPTPSNSGTESDSARSDLEYEDEGEEFPTELDTVNSSGGFSVVGPGKLSVLYPNVNLHGHDVGVVHANCAAPSKRLLYYFEMYVKNAGAKGQIAIGFITSAFKVRRHPGWEANTYGYHGDDGLLYRGRGKGESFGPMYTTDDTKYTTGDTVGGGINYATQEFFFTKNGVVVGTVSKDVKSPVFPTVAVHSQGEEVTVNFGKDPFVFDIKAYEAEQRAIQQEKIDCISIPLDAGHGLVRSYLQHYGYEGTLEFFDMASKSTAPPISLVPENGFNEEDNVYAMNRRTLRELIRHGEIDETFAKLRELYPQIVQDDRSSICFLLHTQKFIELVRVGKLEEAVLYGRSEFEKFKRRSEFDDLVKDCAALLAYERPDNSSVGYLLRESQRELVADAVNAIILATNPNVKDPKCCLQSRLERLLRQLTACFLEKRSLNGGDGEAFHLRRILKSGKKG
+>FRAEX38873_v2_000000010.2
+MDQNQFANELISSYFLQQWRHNSQTLTLNPTPSNSGTESDSARSDLEYEDEGEEFPTELDTVNSSGGFSVVGPGKLSVLYPNVNLHGHDVGVVHANCAAPSKRLLYYFEMYVKNAGAKGQIAIGFITSAFKVRRHPGWEANTYGYHGDDGLLYRGRGKGESFGPMYTTDDTKYTTGDTVGGGINYATQEFFFTKNGVVVGTVSKDVKSPVFPTVAVHSQGEEVTVNFGKDPFVFDIKAYEAEQRAIQQEKIDCISIPLDAGHGLVRSYLQHYGYEGTLEFFDMASKSTAPPISLVPENGFNEEDNVYAMNRRTLRELIRHGEIDETFAKLRELYPQIVQDDRSSICFLLHTQKFIELVRVGKLEEAVLYGRSEFEKFKRRSEFDDLVKDCAALLAYERPDNSSVGYLLRESQRELVADAVNAIILATNPNVKDPKCCLQSRLERLLRQLTACFLEKRSLNGGDGEAFHLRRILKSGKKG

+ 87 - 0
tests/tripal_chado/loaders/FASTAImporterTest.php

@@ -0,0 +1,87 @@
+<?php
+
+namespace Tests;
+
+use StatonLab\TripalTestSuite\DBTransaction;
+use StatonLab\TripalTestSuite\TripalTestCase;
+
+class FASTAImporterTest extends TripalTestCase {
+
+  // Uncomment to auto start and rollback db transactions per test method.
+  use DBTransaction;
+
+  /**
+   * Basic test example.
+   * Tests must begin with the word "test".
+   * See https://phpunit.readthedocs.io/en/latest/ for more information.
+   */
+
+  /**
+   * @group fasta
+   * @group chado
+   */
+  public function testImporterAssociatesParentWithoutRegexp() {
+    module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');
+    $importer = new \FASTAImporter();
+
+    //this test will first create an organism and mrna features with the same name as devseed proteins.
+    //It will then load the devseed protein fasta via the importer.
+    //Finally it will ensure feature_relationships exist with the test mrna.
+
+    $organism = factory('chado.organism')->create();
+    $mrna_term = chado_get_cvterm(['id' => 'SO:0000234']);
+    $analysis = factory('chado.analysis')->create();
+    $mrna_1 = factory('chado.feature')->create([
+      'type_id' => $mrna_term->cvterm_id,
+      'organism_id' => $organism->organism_id,
+      'name' => 'FRAEX38873_v2_000000010.1',
+      'uniquename' => 'FRAEX38873_v2_000000010.1',
+    ]);
+    $mrna_2 = factory('chado.feature')->create([
+      'type_id' => $mrna_term->cvterm_id,
+      'organism_id' => $organism->organism_id,
+      'name' => 'FRAEX38873_v2_000000010.2',
+      'uniquename' => 'FRAEX38873_v2_000000010.2',
+    ]);
+    $file = ['file_local' => __DIR__ . '/../data/two_prots.fasta'];
+
+    $run_args = [
+      'analysis_id' => $analysis->analysis_id,
+      'organism_id' => $organism->organism_id,
+      'seqtype' => 'polypeptide',
+      'parent_type' => "mRNA",
+      'rel_type' => "derives_from",
+      'method' => '2',
+      'match_type' => '1',
+      're_name' => "",
+      're_uname' => "",
+      're_accession' => "",
+      'db_id' => "",
+      're_subject' => "",
+      'match_type' => "1",
+    ];
+
+    $importer->create($run_args, $file);
+    $importer->prepareFiles();
+    $importer->run();
+
+
+    $result = db_select('chado.feature', 'f')
+      ->fields('f')
+      ->condition('f.organism_id', $organism->organism_id)
+      ->execute()
+      ->fetchAll();
+
+    $this->assertNotEquals(2, count($result), 'The child features were not loaded when a regexp was not provided.');
+
+
+    $query = db_select('chado.feature_relationship', 'fr')
+      ->fields('fr')
+      ->condition('fr.object_id', $mrna_1->feature_id);
+      $query->join('chado.feature', 'f', 'f.feature_id = fr.subject_id');
+      $result = $query->execute()
+      ->fetchObject();
+
+    $this->assertNotFalse($result, 'relationship was not added to parente feature when regexp not provided (same parent/child name).');
+  }
+}

+ 5 - 6
tripal_chado/includes/TripalImporter/FASTAImporter.inc

@@ -220,7 +220,9 @@ class FASTAImporter extends TripalImporter {
       '#required' => FALSE,
       '#description' => t('Enter the regular expression that will extract the unique
                          name needed to identify the existing sequence for which the
-                         relationship type selected above will apply.'),
+                         relationship type selected above will apply.  If no regular
+                         expression is provided, the parent unique name must be the 
+                         same as the loaded feature name.'),
       '#weight' => 6
     );
     $form['additional']['relationship']['parent_type'] = array(
@@ -278,9 +280,7 @@ class FASTAImporter extends TripalImporter {
     }
 
     // make sure if a relationship is specified that all fields are provided.
-    if (($rel_type or $parent_type) and !$re_subject) {
-      form_set_error('re_subject', t("Please provide a regular expression for the parent"));
-    }
+
     if (($rel_type or $re_subject) and !$parent_type) {
       form_set_error('parent_type', t("Please provide a SO term for the parent"));
     }
@@ -322,7 +322,6 @@ class FASTAImporter extends TripalImporter {
    * @see TripalImporter::run()
    */
   public function run() {
-
     $arguments = $this->arguments['run_args'];
     $file_path = $this->arguments['files'][0]['file_path'];
 
@@ -536,7 +535,7 @@ class FASTAImporter extends TripalImporter {
         }
 
         // Get the relationship subject
-         $subject = "";
+         $subject = $uname ? $uname : "";
         if (!empty($re_subject)) {
           preg_match("/$re_subject/", $line, $matches);
           $subject = trim($matches[1]);