GFF3ImporterTest.php 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. <?php
  2. namespace Tests;
  3. use StatonLab\TripalTestSuite\DBTransaction;
  4. use StatonLab\TripalTestSuite\TripalTestCase;
  5. class GFF3ImporterTest extends TripalTestCase {
  6. // Uncomment to auto start and rollback db transactions per test method.
  7. use DBTransaction;
  8. /**
  9. * Confirm GFF loads.
  10. *
  11. * @group gff
  12. */
  13. public function testGFFImporter() {
  14. $gff_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];
  15. $analysis = factory('chado.analysis')->create();
  16. $organism = factory('chado.organism')->create();
  17. $run_args = [
  18. 'analysis_id' => $analysis->analysis_id,
  19. 'organism_id' => $organism->organism_id,
  20. 'use_transaction' => 1,
  21. 'add_only' => 0,
  22. 'update' => 1,
  23. 'create_organism' => 0,
  24. 'create_target' => 0,
  25. ///regexps for mRNA and protein.
  26. 're_mrna' => NULL,
  27. 're_protein' => NULL,
  28. //optional
  29. 'target_organism_id' => NULL,
  30. 'target_type' => NULL,
  31. 'start_line' => NULL,
  32. 'landmark_type' => NULL,
  33. 'alt_id_attr' => NULL,
  34. ];
  35. $this->loadLandmarks($analysis, $organism);
  36. $this->runGFFLoader($run_args, $gff_file);
  37. $name = 'FRAEX38873_v2_000000110.2.exon4';
  38. $query = db_select('chado.feature', 'f')
  39. ->fields('f', ['uniquename'])
  40. ->condition('f.uniquename', $name)
  41. ->execute()
  42. ->fetchField();
  43. $this->assertEquals($name, $query);
  44. }
  45. /**
  46. * @group gff
  47. * @group failing
  48. * @ticket 77
  49. *
  50. */
  51. public function testGFFNoProteinOption() {
  52. $gff_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];
  53. $analysis = factory('chado.analysis')->create();
  54. $organism = factory('chado.organism')->create();
  55. $run_args = [
  56. //The new argument
  57. 'skip_protein' => 1,
  58. ///
  59. 'analysis_id' => $analysis->analysis_id,
  60. 'organism_id' => $organism->organism_id,
  61. 'use_transaction' => 1,
  62. 'add_only' => 0,
  63. 'update' => 1,
  64. 'create_organism' => 0,
  65. 'create_target' => 0,
  66. ///regexps for mRNA and protein.
  67. 're_mrna' => NULL,
  68. 're_protein' => NULL,
  69. //optional
  70. 'target_organism_id' => NULL,
  71. 'target_type' => NULL,
  72. 'start_line' => NULL,
  73. 'landmark_type' => NULL,
  74. 'alt_id_attr' => NULL,
  75. ];
  76. $this->loadLandmarks($analysis, $organism);
  77. $this->runGFFLoader($run_args, $gff_file);
  78. $identifier = [
  79. 'cv_id' => ['name' => 'sequence'],
  80. 'name' => 'polypeptide',
  81. ];
  82. $protein_type_id = tripal_get_cvterm($identifier);
  83. //This works i think i just dont have proteins described in the GFF.
  84. $name = 'FRAEX38873_v2_000000110.1-protein';
  85. $query = db_select('chado.feature', 'f')
  86. ->fields('f', ['uniquename'])
  87. ->condition('f.uniquename', $name)
  88. ->condition('f.type_id', $protein_type_id->cvterm_id)
  89. ->execute()
  90. ->fetchField();
  91. $this->assertFalse($query);
  92. $run_args['skip_protein'] = 0;
  93. $this->runGFFLoader($run_args, $gff_file);
  94. $query = db_select('chado.feature', 'f')
  95. ->fields('f', ['uniquename'])
  96. ->condition('f.uniquename', $name)
  97. ->condition('f.type_id', $protein_type_id->cvterm_id)
  98. ->execute()
  99. ->fetchObject();
  100. $this->assertEquals($name, $query->uniquename);
  101. }
  102. private function runGFFLoader($run_args, $file) {
  103. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/GFF3Importer');
  104. $importer = new \GFF3Importer();
  105. $importer->create($run_args, $file);
  106. $importer->prepareFiles();
  107. $importer->run();
  108. }
  109. private function loadLandmarks($analysis, $organism) {
  110. $landmark_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/empty_landmarks.fasta'];
  111. $run_args = [
  112. 'organism_id' => $organism->organism_id,
  113. 'analysis_id' => $analysis->analysis_id,
  114. 'seqtype' => 'scaffold',
  115. 'method' => 2, //default insert and update
  116. 'match_type' => 1, //unique name default
  117. //optional
  118. 're_name' => NULL,
  119. 're_uname' => NULL,
  120. 're_accession' => NULL,
  121. 'db_id' => NULL,
  122. 'rel_type' => NULL,
  123. 're_subject' => NULL,
  124. 'parent_type' => NULL,
  125. ];
  126. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');
  127. $importer = new \FASTAImporter();
  128. $importer->create($run_args, $landmark_file);
  129. $importer->prepareFiles();
  130. $importer->run();
  131. }
  132. }