|
@@ -0,0 +1,205 @@
|
|
|
+<?php
|
|
|
+
|
|
|
+namespace Tests;
|
|
|
+
|
|
|
+use StatonLab\TripalTestSuite\DBTransaction;
|
|
|
+use StatonLab\TripalTestSuite\TripalTestCase;
|
|
|
+
|
|
|
+class GFF3ImporterTest extends TripalTestCase {
|
|
|
+
|
|
|
+ use DBTransaction;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Confirm basic GFF importer functionality.
|
|
|
+ *
|
|
|
+ * @group gff
|
|
|
+ */
|
|
|
+ public function testGFFImporter() {
|
|
|
+ $gff_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];
|
|
|
+ $analysis = factory('chado.analysis')->create();
|
|
|
+ $organism = factory('chado.organism')->create();
|
|
|
+ $run_args = [
|
|
|
+ 'analysis_id' => $analysis->analysis_id,
|
|
|
+ 'organism_id' => $organism->organism_id,
|
|
|
+ 'use_transaction' => 1,
|
|
|
+ 'add_only' => 0,
|
|
|
+ 'update' => 1,
|
|
|
+ 'create_organism' => 0,
|
|
|
+ 'create_target' => 0,
|
|
|
+ ///regexps for mRNA and protein.
|
|
|
+ 're_mrna' => NULL,
|
|
|
+ 're_protein' => NULL,
|
|
|
+ //optional
|
|
|
+ 'target_organism_id' => NULL,
|
|
|
+ 'target_type' => NULL,
|
|
|
+ 'start_line' => NULL,
|
|
|
+ 'landmark_type' => NULL,
|
|
|
+ 'alt_id_attr' => NULL,
|
|
|
+ ];
|
|
|
+ $this->loadLandmarks($analysis, $organism);
|
|
|
+ $this->runGFFLoader($run_args, $gff_file);
|
|
|
+
|
|
|
+ $name = 'FRAEX38873_v2_000000110.2.exon4';
|
|
|
+ $query = db_select('chado.feature', 'f')
|
|
|
+ ->fields('f', ['uniquename'])
|
|
|
+ ->condition('f.uniquename', $name)
|
|
|
+ ->execute()
|
|
|
+ ->fetchField();
|
|
|
+ $this->assertEquals($name, $query);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Add a skip protein option. Test that when checked, implicit proteins are
|
|
|
+ * not created, but that they are created when unchecked.
|
|
|
+ *
|
|
|
+ * @group gff
|
|
|
+ * @ticket 77
|
|
|
+ *
|
|
|
+ */
|
|
|
+ public function testGFFNoProteinOption() {
|
|
|
+
|
|
|
+ $gff_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];
|
|
|
+ $analysis = factory('chado.analysis')->create();
|
|
|
+ $organism = factory('chado.organism')->create();
|
|
|
+ $run_args = [
|
|
|
+ //The new argument
|
|
|
+ 'skip_protein' => 1,
|
|
|
+ ///
|
|
|
+ 'analysis_id' => $analysis->analysis_id,
|
|
|
+ 'organism_id' => $organism->organism_id,
|
|
|
+ 'use_transaction' => 1,
|
|
|
+ 'add_only' => 0,
|
|
|
+ 'update' => 1,
|
|
|
+ 'create_organism' => 0,
|
|
|
+ 'create_target' => 0,
|
|
|
+ ///regexps for mRNA and protein.
|
|
|
+ 're_mrna' => NULL,
|
|
|
+ 're_protein' => NULL,
|
|
|
+ //optional
|
|
|
+ 'target_organism_id' => NULL,
|
|
|
+ 'target_type' => NULL,
|
|
|
+ 'start_line' => NULL,
|
|
|
+ 'landmark_type' => NULL,
|
|
|
+ 'alt_id_attr' => NULL,
|
|
|
+ ];
|
|
|
+ $this->loadLandmarks($analysis, $organism);
|
|
|
+
|
|
|
+ $this->runGFFLoader($run_args, $gff_file);
|
|
|
+
|
|
|
+
|
|
|
+ $identifier = [
|
|
|
+ 'cv_id' => ['name' => 'sequence'],
|
|
|
+ 'name' => 'polypeptide',
|
|
|
+ ];
|
|
|
+ $protein_type_id = tripal_get_cvterm($identifier);
|
|
|
+
|
|
|
+ //This works i think i just dont have proteins described in the GFF.
|
|
|
+
|
|
|
+ $name = 'FRAEX38873_v2_000000110.1-protein';
|
|
|
+ $query = db_select('chado.feature', 'f')
|
|
|
+ ->fields('f', ['uniquename'])
|
|
|
+ ->condition('f.uniquename', $name)
|
|
|
+ ->condition('f.type_id', $protein_type_id->cvterm_id)
|
|
|
+ ->execute()
|
|
|
+ ->fetchField();
|
|
|
+ $this->assertFalse($query);
|
|
|
+
|
|
|
+ $run_args['skip_protein'] = 0;
|
|
|
+
|
|
|
+ $this->runGFFLoader($run_args, $gff_file);
|
|
|
+
|
|
|
+ $query = db_select('chado.feature', 'f')
|
|
|
+ ->fields('f', ['uniquename'])
|
|
|
+ ->condition('f.uniquename', $name)
|
|
|
+ ->condition('f.type_id', $protein_type_id->cvterm_id)
|
|
|
+ ->execute()
|
|
|
+ ->fetchObject();
|
|
|
+ $this->assertEquals($name, $query->uniquename);
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * The GFF importer should still create explicitly defined proteins if
|
|
|
+ * skip_protein is true.
|
|
|
+ *
|
|
|
+ * @ticket 77
|
|
|
+ */
|
|
|
+ public function testGFFImporterLoadsExplicitProteins() {
|
|
|
+
|
|
|
+ $gff_file = ['file_local' => __DIR__ . '/../data/simpleGFF.gff'];
|
|
|
+ $analysis = factory('chado.analysis')->create();
|
|
|
+ $organism = factory('chado.organism')->create();
|
|
|
+ $run_args = [
|
|
|
+ //The new argument
|
|
|
+ 'skip_protein' => 1,
|
|
|
+ ///
|
|
|
+ 'analysis_id' => $analysis->analysis_id,
|
|
|
+ 'organism_id' => $organism->organism_id,
|
|
|
+ 'use_transaction' => 1,
|
|
|
+ 'add_only' => 0,
|
|
|
+ 'update' => 1,
|
|
|
+ 'create_organism' => 0,
|
|
|
+ 'create_target' => 0,
|
|
|
+ ///regexps for mRNA and protein.
|
|
|
+ 're_mrna' => NULL,
|
|
|
+ 're_protein' => NULL,
|
|
|
+ //optional
|
|
|
+ 'target_organism_id' => NULL,
|
|
|
+ 'target_type' => NULL,
|
|
|
+ 'start_line' => NULL,
|
|
|
+ 'landmark_type' => NULL,
|
|
|
+ 'alt_id_attr' => NULL,
|
|
|
+ ];
|
|
|
+ $this->loadLandmarks($analysis, $organism);
|
|
|
+
|
|
|
+ $this->runGFFLoader($run_args, $gff_file);
|
|
|
+
|
|
|
+ $name = 'FRAEX38873_v2_000000010.1.3_test_protein';
|
|
|
+ $query = db_select('chado.feature', 'f')
|
|
|
+ ->fields('f', ['uniquename'])
|
|
|
+ ->condition('f.uniquename', $name)
|
|
|
+ ->execute()
|
|
|
+ ->fetchField();
|
|
|
+ $this->assertEquals($name, $query);
|
|
|
+ }
|
|
|
+
|
|
|
+ private function runGFFLoader($run_args, $file) {
|
|
|
+ // silent(function ($run_args, $file) {
|
|
|
+ module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/GFF3Importer');
|
|
|
+ $importer = new \GFF3Importer();
|
|
|
+ $importer->create($run_args, $file);
|
|
|
+ $importer->prepareFiles();
|
|
|
+ $importer->run();
|
|
|
+ // });
|
|
|
+ }
|
|
|
+
|
|
|
+ private function loadLandmarks($analysis, $organism) {
|
|
|
+ $landmark_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/empty_landmarks.fasta'];
|
|
|
+
|
|
|
+ $run_args = [
|
|
|
+ 'organism_id' => $organism->organism_id,
|
|
|
+ 'analysis_id' => $analysis->analysis_id,
|
|
|
+ 'seqtype' => 'scaffold',
|
|
|
+ 'method' => 2, //default insert and update
|
|
|
+ 'match_type' => 1, //unique name default
|
|
|
+ //optional
|
|
|
+ 're_name' => NULL,
|
|
|
+ 're_uname' => NULL,
|
|
|
+ 're_accession' => NULL,
|
|
|
+ 'db_id' => NULL,
|
|
|
+ 'rel_type' => NULL,
|
|
|
+ 're_subject' => NULL,
|
|
|
+ 'parent_type' => NULL,
|
|
|
+ ];
|
|
|
+ module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');
|
|
|
+ //silent(function ($run_args, $landmark_file) {
|
|
|
+ $importer = new \FASTAImporter();
|
|
|
+ $importer->create($run_args, $landmark_file);
|
|
|
+ $importer->prepareFiles();
|
|
|
+ $importer->run();
|
|
|
+ // });
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+}
|