GFF3ImporterTest.php 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. <?php
  2. namespace Tests;
  3. use StatonLab\TripalTestSuite\DBTransaction;
  4. use StatonLab\TripalTestSuite\TripalTestCase;
  5. class GFF3ImporterTest extends TripalTestCase {
  6. use DBTransaction;
  7. /**
  8. * Confirm basic GFF importer functionality.
  9. *
  10. * @group gff
  11. */
  12. public function testGFFImporter() {
  13. $gff_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];
  14. $analysis = factory('chado.analysis')->create();
  15. $organism = factory('chado.organism')->create();
  16. $run_args = [
  17. 'analysis_id' => $analysis->analysis_id,
  18. 'organism_id' => $organism->organism_id,
  19. 'use_transaction' => 1,
  20. 'add_only' => 0,
  21. 'update' => 1,
  22. 'create_organism' => 0,
  23. 'create_target' => 0,
  24. ///regexps for mRNA and protein.
  25. 're_mrna' => NULL,
  26. 're_protein' => NULL,
  27. //optional
  28. 'target_organism_id' => NULL,
  29. 'target_type' => NULL,
  30. 'start_line' => NULL,
  31. 'landmark_type' => NULL,
  32. 'alt_id_attr' => NULL,
  33. ];
  34. $this->loadLandmarks($analysis, $organism);
  35. $this->runGFFLoader($run_args, $gff_file);
  36. $name = 'FRAEX38873_v2_000000110.2.exon4';
  37. $query = db_select('chado.feature', 'f')
  38. ->fields('f', ['uniquename'])
  39. ->condition('f.uniquename', $name)
  40. ->execute()
  41. ->fetchField();
  42. $this->assertEquals($name, $query);
  43. }
  44. /**
  45. * Add a skip protein option. Test that when checked, implicit proteins are
  46. * not created, but that they are created when unchecked.
  47. *
  48. * @group gff
  49. * @ticket 77
  50. *
  51. */
  52. public function testGFFNoProteinOption() {
  53. $gff_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];
  54. $analysis = factory('chado.analysis')->create();
  55. $organism = factory('chado.organism')->create();
  56. $run_args = [
  57. //The new argument
  58. 'skip_protein' => 1,
  59. ///
  60. 'analysis_id' => $analysis->analysis_id,
  61. 'organism_id' => $organism->organism_id,
  62. 'use_transaction' => 1,
  63. 'add_only' => 0,
  64. 'update' => 1,
  65. 'create_organism' => 0,
  66. 'create_target' => 0,
  67. ///regexps for mRNA and protein.
  68. 're_mrna' => NULL,
  69. 're_protein' => NULL,
  70. //optional
  71. 'target_organism_id' => NULL,
  72. 'target_type' => NULL,
  73. 'start_line' => NULL,
  74. 'landmark_type' => NULL,
  75. 'alt_id_attr' => NULL,
  76. ];
  77. $this->loadLandmarks($analysis, $organism);
  78. $this->runGFFLoader($run_args, $gff_file);
  79. $identifier = [
  80. 'cv_id' => ['name' => 'sequence'],
  81. 'name' => 'polypeptide',
  82. ];
  83. $protein_type_id = tripal_get_cvterm($identifier);
  84. //This works i think i just dont have proteins described in the GFF.
  85. $name = 'FRAEX38873_v2_000000110.1-protein';
  86. $query = db_select('chado.feature', 'f')
  87. ->fields('f', ['uniquename'])
  88. ->condition('f.uniquename', $name)
  89. ->condition('f.type_id', $protein_type_id->cvterm_id)
  90. ->execute()
  91. ->fetchField();
  92. $this->assertFalse($query);
  93. $run_args['skip_protein'] = 0;
  94. $this->runGFFLoader($run_args, $gff_file);
  95. $query = db_select('chado.feature', 'f')
  96. ->fields('f', ['uniquename'])
  97. ->condition('f.uniquename', $name)
  98. ->condition('f.type_id', $protein_type_id->cvterm_id)
  99. ->execute()
  100. ->fetchObject();
  101. $this->assertEquals($name, $query->uniquename);
  102. }
  103. /**
  104. * The GFF importer should still create explicitly defined proteins if
  105. * skip_protein is true.
  106. *
  107. * @ticket 77
  108. */
  109. public function testGFFImporterLoadsExplicitProteins() {
  110. $gff_file = ['file_local' => __DIR__ . '/../data/simpleGFF.gff'];
  111. $analysis = factory('chado.analysis')->create();
  112. $organism = factory('chado.organism')->create();
  113. $run_args = [
  114. //The new argument
  115. 'skip_protein' => 1,
  116. ///
  117. 'analysis_id' => $analysis->analysis_id,
  118. 'organism_id' => $organism->organism_id,
  119. 'use_transaction' => 1,
  120. 'add_only' => 0,
  121. 'update' => 1,
  122. 'create_organism' => 0,
  123. 'create_target' => 0,
  124. ///regexps for mRNA and protein.
  125. 're_mrna' => NULL,
  126. 're_protein' => NULL,
  127. //optional
  128. 'target_organism_id' => NULL,
  129. 'target_type' => NULL,
  130. 'start_line' => NULL,
  131. 'landmark_type' => NULL,
  132. 'alt_id_attr' => NULL,
  133. ];
  134. $this->loadLandmarks($analysis, $organism);
  135. $this->runGFFLoader($run_args, $gff_file);
  136. $name = 'FRAEX38873_v2_000000010.1.3_test_protein';
  137. $query = db_select('chado.feature', 'f')
  138. ->fields('f', ['uniquename'])
  139. ->condition('f.uniquename', $name)
  140. ->execute()
  141. ->fetchField();
  142. $this->assertEquals($name, $query);
  143. }
  144. private function runGFFLoader($run_args, $file) {
  145. // silent(function ($run_args, $file) {
  146. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/GFF3Importer');
  147. $importer = new \GFF3Importer();
  148. $importer->create($run_args, $file);
  149. $importer->prepareFiles();
  150. $importer->run();
  151. // });
  152. }
  153. private function loadLandmarks($analysis, $organism) {
  154. $landmark_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/empty_landmarks.fasta'];
  155. $run_args = [
  156. 'organism_id' => $organism->organism_id,
  157. 'analysis_id' => $analysis->analysis_id,
  158. 'seqtype' => 'scaffold',
  159. 'method' => 2, //default insert and update
  160. 'match_type' => 1, //unique name default
  161. //optional
  162. 're_name' => NULL,
  163. 're_uname' => NULL,
  164. 're_accession' => NULL,
  165. 'db_id' => NULL,
  166. 'rel_type' => NULL,
  167. 're_subject' => NULL,
  168. 'parent_type' => NULL,
  169. ];
  170. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');
  171. //silent(function ($run_args, $landmark_file) {
  172. $importer = new \FASTAImporter();
  173. $importer->create($run_args, $landmark_file);
  174. $importer->prepareFiles();
  175. $importer->run();
  176. // });
  177. }
  178. }