__DIR__ . '/../data/small_gene.gff']; $fasta = ['file_local' => __DIR__ . '/../data/short_scaffold.fasta']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism, $fasta); $this->runGFFLoader($run_args, $gff_file); // This protein is an explicit protein / polypeptide imported from the GFF // file. $name = 'test_protein_001.1'; $query = db_select('chado.feature', 'f') ->fields('f', ['uniquename']) ->condition('f.uniquename', $name) ->execute() ->fetchField(); $this->assertEquals($name, $query); } /** * Run the GFF loader on gff_tag_unescaped_character.gff for testing. * * This tests whether the GFF loader adds IDs that contain a comma. * The GFF loader should allow it */ public function testGFFImporterUnescapedTagWithComma() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_tag_unescaped_character.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); // This should throw an error based on the tag name having the comma $hasException = false; try { $this->runGFFLoader($run_args, $gff_file); } catch (\Exception $ex) { $hasException = true; } $this->assertEquals($hasException, false); } /** * Run the GFF loader on small_gene.gff for testing. * * This tests whether the GFF loader adds Alias attributes * The GFF loader should allow it */ public function testGFFImporterTagAliasVerification() { $gff_file = ['file_local' => __DIR__ . '/../data/small_gene.gff']; $fasta = ['file_local' => __DIR__ . '/../data/short_scaffold.fasta']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism, $fasta); $this->runGFFLoader($run_args, $gff_file); $results = db_query("SELECT * FROM chado.feature_synonym fs LEFT JOIN chado.synonym s ON (fs.feature_synonym_id = s.synonym_id) WHERE name = 'first_test_gene' ;",array()); $this->assertEquals($results->rowCount(), 1); } /** * Run the GFF loader on gff_tag_parent_verification.gff for testing. * * This tests whether the GFF loader adds Parent attributes * The GFF loader should allow it */ public function testGFFImporterTagParentVerification() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_tag_parent_verification.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $this->runGFFLoader($run_args, $gff_file); $results = db_query("SELECT * FROM chado.feature_relationship fr LEFT JOIN chado.feature f ON (fr.object_id = f.feature_id) WHERE f.uniquename = 'FRAEX38873_v2_000000010' ;",array()); // Found parent via object_id FRAEX38873_v2_000000010 $this->assertEquals($results->rowCount(), 1); } /** * Run the GFF loader on gff_tagvalue_unescaped_character.gff for testing. * * This tests whether the GFF loader adds IDs that contain a comma. * The GFF loader should allow it */ public function testGFFImporterEscapedTagValueWithEncodedCharacter() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_tagvalue_encoded_character.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $this->runGFFLoader($run_args, $gff_file); $results = db_query("SELECT * FROM chado.feature WHERE uniquename = 'FRAEX38873_v2_000000010,20';",array()); $this->assertEquals($results->rowCount(), 1); } /** * Run the GFF loader on gff_tagvalue_unescaped_character.gff for testing. * * This tests whether the GFF loader adds IDs that contain a comma. * The GFF loader should allow it */ public function testGFFImporterUnescapedTagValueWithComma() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_tagvalue_unescaped_character.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); // This should throw an error based on the tag name having the comma $hasException = false; try { $this->runGFFLoader($run_args, $gff_file); } catch (\Exception $ex) { $hasException = true; } $this->assertEquals($hasException, false); } /** * Run the GFF loader on gff_seqid_invalid_character.gff for testing. * Seqids seem to also be called landmarks within GFF loader. * This tests whether the GFF loader has any issues with characters like * single quotes. */ public function testGFFImporterSeqidWithInvalidCharacter() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_seqid_invalid_character.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); // This will produce an exception due to quote character in Seqid $hasException = false; try { $this->runGFFLoader($run_args, $gff_file); } catch (\Exception $ex) { $hasException = true; } $this->assertEquals($hasException, true); } /** * Run the GFF loader on gff_unescaped_ids.gff for testing. * * This tests whether the GFF loader adds IDs that contain whitespaces. * The GFF loader should allow it */ public function testGFFImporterUnescapedWhitespaceID() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_unescaped_ids.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); // This should go through just fine $this->runGFFLoader($run_args, $gff_file); $results = db_query("SELECT * FROM chado.feature WHERE uniquename = 'FRAEX38873_v2_000000010 SPACED';"); $this->assertEquals($results->rowCount(), 1); } /** * Run the GFF loader on gff_rightarrow_ids.gff for testing. * * This tests whether the GFF loader fails if ID contains * arrow >. It should not fail. */ public function testGFFImporterRightArrowID() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_rightarrow_id.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); // This will produce an exception due to right arrow in ID $this->runGFFLoader($run_args, $gff_file); $results = db_query("SELECT * FROM chado.feature WHERE uniquename = '>FRAEX38873_v2_000000010';"); // We expect this record to get inserted. $this->assertEquals($results->rowCount(), 1); } /** * Run the GFF loader on gff_duplicate_ids.gff for testing. * * This tests whether the GFF loader detects duplicate IDs which makes a * GFF file invalid since IDs should be unique. The GFF loader should throw * and exception which this test checks for */ public function testGFFImporterDuplicateIDsExceptionCheck() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_duplicate_ids.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $hasException = false; try { $this->loadLandmarks($analysis, $organism); // This will produce an exception of duplicate feature ID $this->runGFFLoader($run_args, $gff_file); } catch(\Exception $ex) { $hasException = true; } // We expect an exception to happen so we are looking for a return of true $this->assertEquals($hasException, true); } /** * Run the GFF loader on gff_invalidstartend.gff for testing. * * This tests whether the GFF loader fixes start end values */ public function testGFFImporterInvalidStartEnd() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_invalidstartend.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); // This will produce an exception of duplicate feature ID $this->runGFFLoader($run_args, $gff_file); $results = db_select('chado.feature', 'f') ->fields('f', ['uniquename']) ->condition('f.uniquename', 'FRAEX38873_v2_000000010') ->execute() ->fetchAll(); // We expect the feature to still be added to the database // since the GFF Loader caters for reversing backward numbers $this->assertEquals(count($results), 1); } /** * Run the GFF loader on gff_score.gff for testing. * * This tests whether the GFF loader interprets the score values */ public function testGFFImporterScoreTest() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_score.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $this->runGFFLoader($run_args, $gff_file); // Test that integer values get placed in the db $results = db_query('SELECT * FROM chado.analysisfeature WHERE significance = 2 LIMIT 1', array( )); foreach ($results as $row){ $this->assertEquals($row->significance,2); } // Test that decimal/float values get placed in the db $results = db_query('SELECT * FROM chado.analysisfeature WHERE significance = 2.5 LIMIT 1', array( )); foreach ($results as $row){ $this->assertEquals($row->significance,2.5); } // Test that negative score values get placed in the db $results = db_query('SELECT * FROM chado.analysisfeature WHERE significance = -2.5 LIMIT 1', array( )); foreach ($results as $row){ $this->assertEquals($row->significance,-2.5); } } /** * Run the GFF loader on gff_strand.gff for testing. * * This tests whether the GFF loader interprets the strand values */ public function testGFFImporterInvalidStrandTest() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_strand_invalid.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $isException = false; try { $this->runGFFLoader($run_args, $gff_file); } catch(\Exception $ex) { $isException = true; } $this->assertEquals($isException, true); } /** * Run the GFF loader on gff_strand.gff for testing. * * This tests whether the GFF loader interprets the strand values */ public function testGFFImporterStrandTest() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_strand.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $this->runGFFLoader($run_args, $gff_file); // Test that integer values for strand that get placed in the db // Strand data gets saved in chado.featureloc $results = db_query('SELECT * FROM chado.featureloc fl LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id) WHERE uniquename = :uniquename LIMIT 1', array( ':uniquename' => 'FRAEX38873_v2_000000010' ) ); foreach ($results as $row) { $this->assertEquals($row->strand, 1); // + } $results = db_query('SELECT * FROM chado.featureloc fl LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id) WHERE uniquename = :uniquename LIMIT 1', array( ':uniquename' => 'FRAEX38873_v2_000000010.1' ) ); foreach ($results as $row) { $this->assertEquals($row->strand,-1); // - } $results = db_query('SELECT * FROM chado.featureloc fl LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id) WHERE uniquename = :uniquename LIMIT 1', array( ':uniquename' => 'FRAEX38873_v2_000000010.2' ) ); foreach ($results as $row) { $this->assertEquals($row->strand, 0); // ? } $results = db_query('SELECT * FROM chado.featureloc fl LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id) WHERE uniquename = :uniquename LIMIT 1', array( ':uniquename' => 'FRAEX38873_v2_000000010.3' ) ); foreach ($results as $row) { $this->assertEquals($row->strand, 0); // . } } /** * Run the GFF loader on gff_phase.gff for testing. * * This tests whether the GFF loader interprets the phase values correctly * for CDS rows. */ public function testGFFImporterPhaseTest() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_phase.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $this->runGFFLoader($run_args, $gff_file); $results = db_query("SELECT * FROM chado.feature WHERE uniquename = :uniquename LIMIT 1", array( ':uniquename' => 'FRAEX38873_v2_000000010.1.cds1' ) ); // Check to make sure it returns a single row (implying a match) // by the uniquename specified $this->assertEquals($results->rowCount(), 1); $results = db_query("SELECT * FROM chado.featureloc WHERE phase = 1 LIMIT 1", array( ) ); // Check to make sure it returns a single row (implying a match) // by phase value 1 $this->assertEquals($results->rowCount(), 1); } /** * Run the GFF loader on gff_phase_invalid_number.gff for testing. * * This tests whether the GFF loader interprets the phase values correctly * for CDS rows when a number outside of the range 0,1,2 is specified. */ public function testGFFImporterInvalidPhaseNumberTest() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_phase_invalid_number.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $hasException = false; try { $this->runGFFLoader($run_args, $gff_file); } catch (\Exception $ex) { $hasException = true; } // An exception should have been thrown since the phase number is invalid $this->assertEquals($hasException, true); } /** * Run the GFF loader on gff_phase_invalid_character.gff for testing. * * This tests whether the GFF loader interprets the phase values correctly * for CDS rows when a character outside of the range 0,1,2 is specified. */ public function testGFFImporterInvalidPhaseCharacterTest() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_phase_invalid_character.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $hasException = false; try { $this->runGFFLoader($run_args, $gff_file); } catch (\Exception $ex) { $hasException = true; } // An exception should have been thrown since the phase number is invalid $this->assertEquals($hasException, true); } /** * Run the GFF loader on small_gene.gff for testing. * * This gff has many attributes that we would like to test in the * testGFFImporterAttribute*() methods. */ private function initGFFImporterAttributes() { $gff = ['file_local' => __DIR__ . '/../data/small_gene.gff']; $fasta = ['file_local' => __DIR__ . '/../data/short_scaffold.fasta']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, ///regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, //optional 'target_organism_id' => $organism->organism_id, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism, $fasta); $this->runGFFLoader($run_args, $gff); $this->organism = $organism; $this->analysis = $analysis; $this->gene_cvt = chado_get_cvterm(array( 'name' => 'gene', 'cv_id' => array( 'name' => 'sequence', ), ))->cvterm_id; $this->mrna_cvt = chado_get_cvterm(array( 'name' => 'mRNA', 'cv_id' => array( 'name' => 'sequence', ), ))->cvterm_id; $this->supercontig_cvt = chado_get_cvterm(array( 'name' => 'supercontig', 'cv_id' => array( 'name' => 'sequence', ), ))->cvterm_id; $this->gene_1_uname = 'test_gene_001'; $this->gene_2_uname = 'test_gene_002'; $this->scaffold_1_uname = 'scaffold1'; } /** * Ensures that the feature record is loaded correctly into chado. * * @group gff */ public function testGFFImporterAttributeFeature() { $this->initGFFImporterAttributes(); $organism = $this->organism; $query = db_select('chado.feature', 'f') ->fields('f') ->condition('uniquename', $this->gene_1_uname) ->condition('type_id', $this->gene_cvt) ->execute(); $gene_1 = $query->fetchObject(); $this->assertEquals('test_gene_001', $gene_1->uniquename); $this->assertEquals('test_gene_001', $gene_1->name); $this->assertEquals($organism->organism_id, $gene_1->organism_id); $this->assertEquals($this->gene_cvt, $gene_1->type_id); } /** * Ensures the feature alias is loaded correctly into chado. * * @group gff */ public function testGFFImporterAttributeAlias() { $this->initGFFImporterAttributes(); $alias = 'first_test_gene'; $gene_1 = db_select('chado.feature', 'f') ->fields('f') ->condition('uniquename', $this->gene_1_uname) ->condition('type_id', $this->gene_cvt) ->execute()->fetchObject(); $query = db_select('chado.feature_synonym', 'fs'); $query->join('chado.synonym', 's', 's.synonym_id = fs.synonym_id'); $query->fields('s'); $query->condition('fs.feature_id', $gene_1->feature_id); $query = $query->execute(); $result = $query->fetchObject(); $this->assertEquals($alias, $result->name); } /** * Ensures that the dbxref records are loaded correctly into chado. * * @group gff */ public function testGFFImporterAttributeDbxref() { $this->initGFFImporterAttributes(); $test_db_name = 'TEST_DB'; $dbx_accession = 'test_gene_dbx_001'; $test_db = chado_get_db(array('name' => $test_db_name)); $gff_db = chado_get_db(array('name' => 'GFF_source')); $gene_1 = db_select('chado.feature', 'f') ->fields('f') ->condition('uniquename', $this->gene_1_uname) ->condition('type_id', $this->gene_cvt) ->execute()->fetchObject(); $dbx_query = db_select('chado.feature_dbxref', 'fdbx'); $dbx_query->join('chado.dbxref', 'dbx', 'dbx.dbxref_id = fdbx.dbxref_id'); $dbx_query->fields('dbx'); $dbx_query->condition('fdbx.feature_id', $gene_1->feature_id); $gff_query = clone $dbx_query; $dbx_query->condition('dbx.db_id', $test_db->db_id); $dbx_query = $dbx_query->execute(); $gff_query->condition('dbx.db_id', $gff_db->db_id); $gff_query = $gff_query->execute(); $dbxref = $dbx_query->fetchObject(); $gff_dbxref = $gff_query->fetchObject(); $this->assertEquals($dbx_accession, $dbxref->accession); $this->assertEquals($this->gene_1_uname, $gff_dbxref->accession); } /** * Ensures ontology term records loaded correctly into chado. * * @group gff */ public function testGFFImporterAttributeOntology() { $this->initGFFImporterAttributes(); $ontology_db = 'SO'; $ontology_accession = '0000704'; $gene_1 = db_select('chado.feature', 'f') ->fields('f') ->condition('uniquename', $this->gene_1_uname) ->condition('type_id', $this->gene_cvt) ->execute()->fetchObject(); $term = chado_get_cvterm(array( 'dbxref_id' => array( 'accession' => $ontology_accession, 'db_id' => array( 'name' => $ontology_db, ), ), )); $feature_cvt = db_select('chado.feature_cvterm', 'fcvt') ->fields('fcvt') ->condition('cvterm_id', $term->cvterm_id) ->condition('feature_id', $gene_1->feature_id) ->execute(); $this->assertEquals(1, $feature_cvt->rowCount()); } /** * Ensures feature parent record loaded correctly into chado. * * @group gff */ public function testGFFImporterAttributeParent() { $this->initGFFImporterAttributes(); $mrna_uname = 'test_mrna_001.1'; $rel_cvt = chado_get_cvterm(array( 'name' => 'part_of', 'cv_id' => array( 'name' => 'sequence', ), ))->cvterm_id; $mrna = db_select('chado.feature', 'f') ->fields('f') ->condition('uniquename', $mrna_uname) ->condition('type_id', $this->mrna_cvt) ->execute()->fetchObject(); $query = db_select('chado.feature_relationship', 'fr'); $query->join('chado.feature', 'f', 'f.feature_id = fr.object_id'); $query->fields('f'); $query->condition('fr.subject_id', $mrna->feature_id); $query->condition('fr.type_id', $rel_cvt); $query = $query->execute(); $parent = $query->fetchObject(); $this->assertEquals('test_gene_001', $parent->uniquename); $this->assertEquals('test_gene_001', $parent->name); $this->assertEquals($this->gene_cvt, $parent->type_id); $this->assertEquals($this->organism->organism_id, $parent->organism_id); } /** * Ensure target record loaded correctly into chado. * * @group gff */ public function testGFFImporterAttributeTarget() { $this->initGFFImporterAttributes(); $target_feature = 'scaffold1'; $start = 99; $end = 200; $target_type = 'supercontig'; $target_cvt = chado_get_cvterm(array( 'name' => $target_type, 'cv_id' => array( 'name' => 'sequence', ), ))->cvterm_id; $source_feature = db_select('chado.feature', 'f') ->fields('f') ->condition('uniquename', $target_feature) ->condition('type_id', $target_cvt) ->execute()->fetchObject(); $gene_1 = db_select('chado.feature', 'f') ->fields('f') ->condition('uniquename', $this->gene_1_uname) ->condition('type_id', $this->gene_cvt) ->execute()->fetchObject(); $featureloc = db_select('chado.featureloc', 'fl') ->fields('fl') ->condition('fl.feature_id', $gene_1->feature_id) ->condition('fl.srcfeature_id', $source_feature->feature_id) ->execute()->fetchObject(); $this->assertEquals($start, $featureloc->fmin); $this->assertEquals($end, $featureloc->fmax); } /** * Ensure properties loaded correctly into chado. * * @group gff */ public function testGFFImporterAttributeProperty() { $this->initGFFImporterAttributes(); $gap_1 = 'test_gap_1'; $gap_2 = 'test_gap_2'; $note_val = 'test_gene_001_note'; $gene_1 = db_select('chado.feature', 'f') ->fields('f') ->condition('uniquename', $this->gene_1_uname) ->condition('type_id', $this->gene_cvt) ->execute()->fetchObject(); $gap_cvt = chado_get_cvterm(array( 'name' => 'Gap', 'cv_id' => array( 'name' => 'feature_property', ), ))->cvterm_id; $note_cvt = chado_get_cvterm(array( 'name' => 'Note', 'cv_id' => array( 'name' => 'feature_property', ), ))->cvterm_id; // Assert gaps loaded correctly $gaps_query = db_select('chado.featureprop', 'fp') ->fields('fp') ->condition('feature_id', $gene_1->feature_id) ->condition('type_id', $gap_cvt) ->execute(); while (($gap = $gaps_query->fetchObject())) { $gaps[$gap->value] = $gap; } $this->assertEquals($gap_1, $gaps[$gap_1]->value); $this->assertEquals(0, $gaps[$gap_1]->rank); // Assert note loaded correctly $note = db_select('chado.featureprop', 'fp') ->fields('fp') ->condition('feature_id', $gene_1->feature_id) ->condition('type_id', $note_cvt) ->execute()->fetchObject(); $this->assertEquals($note_val, $note->value); $this->assertEquals(0, $note->rank); } /** * Ensure derives from information loaded correctly into chado. * * @group gff */ public function testGFFImporterAttributeDerivesFrom() { $this->initGFFImporterAttributes(); $gene_2 = db_select('chado.feature', 'f') ->fields('f') ->condition('uniquename', $this->gene_2_uname) ->condition('type_id', $this->gene_cvt) ->execute()->fetchObject(); $derivesfrom_cvt = chado_get_cvterm(array( 'name' => 'derives_from', 'cv_id' => array( 'name' => 'sequence', ), ))->cvterm_id; $query = db_select('chado.feature', 'f'); $query->join('chado.feature_relationship', 'fr', 'f.feature_id = fr.object_id'); $query->fields('f'); $query->condition('fr.subject_id', $gene_2->feature_id); $query->condition('fr.type_id', $derivesfrom_cvt); $query = $query->execute(); $derivesfrom_feature = $query->fetchObject(); $this->assertEquals($this->gene_1_uname, $derivesfrom_feature->uniquename); $this->assertEquals($this->gene_1_uname, $derivesfrom_feature->name); $this->assertEquals($this->gene_cvt, $derivesfrom_feature->type_id); } /** * Ensure FASTA information loaded correctly into chado. * * @group gff */ public function testGFFImporterAttributeFastas() { $this->initGFFImporterAttributes(); $scaffold = db_select('chado.feature', 'f') ->fields('f') ->condition('uniquename', $this->scaffold_1_uname) ->condition('type_id', $this->supercontig_cvt) ->execute()->fetchObject(); $this->assertEquals(720, $scaffold->seqlen); $this->assertEquals(720, strlen($scaffold->residues)); $this->assertEquals('83578d8afdaec399c682aa6c0ddd29c9', $scaffold->md5checksum); } /** * Test that when checked, explicit proteins are created when specified within * the GFF file. Explicit proteins will not respect the skip_protein argument * and will therefore be added to the database. * * @group gff * @ticket 77 * */ public function testGFFPolypeptide() { $gff_file = ['file_local' => __DIR__ . '/../data/simpleGFF.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ // The new argument 'skip_protein' => 1, 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, // regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, // optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $this->runGFFLoader($run_args, $gff_file); $identifier = [ 'cv_id' => ['name' => 'sequence'], 'name' => 'polypeptide', ]; $protein_type_id = tripal_get_cvterm($identifier); $name = 'FRAEX38873_v2_000000010.1.3_test_protein'; $query = db_select('chado.feature', 'f') ->fields('f', ['uniquename']) ->condition('f.uniquename', $name) ->condition('f.type_id', $protein_type_id->cvterm_id) ->execute() ->fetchAll(); $this->assertEquals(1, count($query)); } /** * Add a skip protein option. Test that when checked, implicit proteins are * not created, but that they are created when unchecked. * * @group gff * @ticket 77 * */ public function testGFFNoProteinOption() { $gff_file = ['file_local' => __DIR__ . '/../data/gff_protein_generation.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ //Skip protein feature generation 'skip_protein' => 1, 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, ///regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, //optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $this->runGFFLoader($run_args, $gff_file); $identifier = [ 'cv_id' => ['name' => 'sequence'], 'name' => 'polypeptide', ]; $protein_type_id = tripal_get_cvterm($identifier); $name = "FRAEX38873_v2_000000190.1-protein"; $results = db_select('chado.feature', 'f') ->fields('f', ['uniquename']) ->condition('f.uniquename', $name) ->condition('f.type_id', $protein_type_id->cvterm_id) ->execute() ->fetchAll(); // There should be no proteins since we used the skip_proteins flag and no // explicit proteins were specified in the test file $this->assertEquals(0, count($results)); // Now perform a unit test where we do not skip proteins generation $run_args['skip_protein'] = 0; $this->runGFFLoader($run_args, $gff_file); $query = db_select('chado.feature', 'f') ->fields('f', ['uniquename']) ->condition('f.uniquename', $name) ->condition('f.type_id', $protein_type_id->cvterm_id) ->execute() ->fetchObject(); $this->assertEquals($name, $query->uniquename); } /** * The GFF importer should still create explicitly defined proteins if * skip_protein is true. * * @group gff * @ticket 77 */ public function testGFFImporterLoadsExplicitProteins() { $gff_file = ['file_local' => __DIR__ . '/../data/simpleGFF.gff']; $analysis = factory('chado.analysis')->create(); $organism = factory('chado.organism')->create(); $run_args = [ //The new argument 'skip_protein' => 1, /// 'analysis_id' => $analysis->analysis_id, 'organism_id' => $organism->organism_id, 'use_transaction' => 1, 'add_only' => 0, 'update' => 1, 'create_organism' => 0, 'create_target' => 0, ///regexps for mRNA and protein. 're_mrna' => NULL, 're_protein' => NULL, //optional 'target_organism_id' => NULL, 'target_type' => NULL, 'start_line' => NULL, 'landmark_type' => NULL, 'alt_id_attr' => NULL, ]; $this->loadLandmarks($analysis, $organism); $this->runGFFLoader($run_args, $gff_file); $name = 'FRAEX38873_v2_000000010.1.3_test_protein'; $query = db_select('chado.feature', 'f') ->fields('f', ['uniquename']) ->condition('f.uniquename', $name) ->execute() ->fetchField(); $this->assertEquals($name, $query); } private function runGFFLoader($run_args, $file) { // silent(function ($run_args, $file) { module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/GFF3Importer'); $importer = new \GFF3Importer(); $importer->create($run_args, $file); $importer->prepareFiles(); $importer->run(); // }); } private function loadLandmarks($analysis, $organism, $landmark_file = array()) { if (empty($landmark_file)) { $landmark_file = ['file_local' => __DIR__ . '/../data/empty_landmarks.fasta']; } $run_args = [ 'organism_id' => $organism->organism_id, 'analysis_id' => $analysis->analysis_id, 'seqtype' => 'supercontig', 'method' => 2, //default insert and update 'match_type' => 1, //unique name default //optional 're_name' => NULL, 're_uname' => NULL, 're_accession' => NULL, 'db_id' => NULL, 'rel_type' => NULL, 're_subject' => NULL, 'parent_type' => NULL, ]; module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter'); //silent(function ($run_args, $landmark_file) { $importer = new \FASTAImporter(); $importer->create($run_args, $landmark_file); $importer->prepareFiles(); $importer->run(); // }); } }