GFF3ImporterTest.php 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. <?php
  2. namespace Tests;
  3. use StatonLab\TripalTestSuite\DBTransaction;
  4. use StatonLab\TripalTestSuite\TripalTestCase;
  5. class GFF3ImporterTest extends TripalTestCase {
  6. use DBTransaction;
  7. /**
  8. * Confirm basic GFF importer functionality.
  9. *
  10. * @group gff
  11. */
  12. public function testGFFImporter() {
  13. $gff_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];
  14. $analysis = factory('chado.analysis')->create();
  15. $organism = factory('chado.organism')->create();
  16. $run_args = [
  17. 'analysis_id' => $analysis->analysis_id,
  18. 'organism_id' => $organism->organism_id,
  19. 'use_transaction' => 1,
  20. 'add_only' => 0,
  21. 'update' => 1,
  22. 'create_organism' => 0,
  23. 'create_target' => 0,
  24. ///regexps for mRNA and protein.
  25. 're_mrna' => NULL,
  26. 're_protein' => NULL,
  27. //optional
  28. 'target_organism_id' => NULL,
  29. 'target_type' => NULL,
  30. 'start_line' => NULL,
  31. 'landmark_type' => NULL,
  32. 'alt_id_attr' => NULL,
  33. ];
  34. $this->loadLandmarks($analysis, $organism);
  35. $this->runGFFLoader($run_args, $gff_file);
  36. $name = 'FRAEX38873_v2_000000110.2.exon4';
  37. $query = db_select('chado.feature', 'f')
  38. ->fields('f', ['uniquename'])
  39. ->condition('f.uniquename', $name)
  40. ->execute()
  41. ->fetchField();
  42. $this->assertEquals($name, $query);
  43. }
  44. /**
  45. * Add a skip protein option. Test that when checked, implicit proteins are
  46. * not created, but that they are created when unchecked.
  47. *
  48. * @group gff
  49. * @ticket 77
  50. *
  51. */
  52. public function testGFFNoProteinOption() {
  53. $gff_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];
  54. $analysis = factory('chado.analysis')->create();
  55. $organism = factory('chado.organism')->create();
  56. $run_args = [
  57. //The new argument
  58. 'skip_protein' => 1,
  59. ///
  60. 'analysis_id' => $analysis->analysis_id,
  61. 'organism_id' => $organism->organism_id,
  62. 'use_transaction' => 1,
  63. 'add_only' => 0,
  64. 'update' => 1,
  65. 'create_organism' => 0,
  66. 'create_target' => 0,
  67. ///regexps for mRNA and protein.
  68. 're_mrna' => NULL,
  69. 're_protein' => NULL,
  70. //optional
  71. 'target_organism_id' => NULL,
  72. 'target_type' => NULL,
  73. 'start_line' => NULL,
  74. 'landmark_type' => NULL,
  75. 'alt_id_attr' => NULL,
  76. ];
  77. $this->loadLandmarks($analysis, $organism);
  78. $this->runGFFLoader($run_args, $gff_file);
  79. $identifier = [
  80. 'cv_id' => ['name' => 'sequence'],
  81. 'name' => 'polypeptide',
  82. ];
  83. $protein_type_id = tripal_get_cvterm($identifier);
  84. //This works i think i just dont have proteins described in the GFF.
  85. $name = 'FRAEX38873_v2_000000110.1-protein';
  86. $query = db_select('chado.feature', 'f')
  87. ->fields('f', ['uniquename'])
  88. ->condition('f.uniquename', $name)
  89. ->condition('f.type_id', $protein_type_id->cvterm_id)
  90. ->execute()
  91. ->fetchField();
  92. $this->assertFalse($query);
  93. $run_args['skip_protein'] = 0;
  94. $this->runGFFLoader($run_args, $gff_file);
  95. $query = db_select('chado.feature', 'f')
  96. ->fields('f', ['uniquename'])
  97. ->condition('f.uniquename', $name)
  98. ->condition('f.type_id', $protein_type_id->cvterm_id)
  99. ->execute()
  100. ->fetchObject();
  101. $this->assertEquals($name, $query->uniquename);
  102. }
  103. /**
  104. * The GFF importer should still create explicitly defined proteins if
  105. * skip_protein is true.
  106. *
  107. * @group gff
  108. * @ticket 77
  109. */
  110. public function testGFFImporterLoadsExplicitProteins() {
  111. $gff_file = ['file_local' => __DIR__ . '/../data/simpleGFF.gff'];
  112. $analysis = factory('chado.analysis')->create();
  113. $organism = factory('chado.organism')->create();
  114. $run_args = [
  115. //The new argument
  116. 'skip_protein' => 1,
  117. ///
  118. 'analysis_id' => $analysis->analysis_id,
  119. 'organism_id' => $organism->organism_id,
  120. 'use_transaction' => 1,
  121. 'add_only' => 0,
  122. 'update' => 1,
  123. 'create_organism' => 0,
  124. 'create_target' => 0,
  125. ///regexps for mRNA and protein.
  126. 're_mrna' => NULL,
  127. 're_protein' => NULL,
  128. //optional
  129. 'target_organism_id' => NULL,
  130. 'target_type' => NULL,
  131. 'start_line' => NULL,
  132. 'landmark_type' => NULL,
  133. 'alt_id_attr' => NULL,
  134. ];
  135. $this->loadLandmarks($analysis, $organism);
  136. $this->runGFFLoader($run_args, $gff_file);
  137. $name = 'FRAEX38873_v2_000000010.1.3_test_protein';
  138. $query = db_select('chado.feature', 'f')
  139. ->fields('f', ['uniquename'])
  140. ->condition('f.uniquename', $name)
  141. ->execute()
  142. ->fetchField();
  143. $this->assertEquals($name, $query);
  144. }
  145. private function runGFFLoader($run_args, $file) {
  146. // silent(function ($run_args, $file) {
  147. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/GFF3Importer');
  148. $importer = new \GFF3Importer();
  149. $importer->create($run_args, $file);
  150. $importer->prepareFiles();
  151. $importer->run();
  152. // });
  153. }
  154. private function loadLandmarks($analysis, $organism) {
  155. $landmark_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/empty_landmarks.fasta'];
  156. $run_args = [
  157. 'organism_id' => $organism->organism_id,
  158. 'analysis_id' => $analysis->analysis_id,
  159. 'seqtype' => 'supercontig',
  160. 'method' => 2, //default insert and update
  161. 'match_type' => 1, //unique name default
  162. //optional
  163. 're_name' => NULL,
  164. 're_uname' => NULL,
  165. 're_accession' => NULL,
  166. 'db_id' => NULL,
  167. 'rel_type' => NULL,
  168. 're_subject' => NULL,
  169. 'parent_type' => NULL,
  170. ];
  171. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');
  172. //silent(function ($run_args, $landmark_file) {
  173. $importer = new \FASTAImporter();
  174. $importer->create($run_args, $landmark_file);
  175. $importer->prepareFiles();
  176. $importer->run();
  177. // });
  178. }
  179. }