GFF3ImporterTest.php 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371
  1. <?php
  2. namespace Tests;
  3. use StatonLab\TripalTestSuite\DBTransaction;
  4. use StatonLab\TripalTestSuite\TripalTestCase;
  5. class GFF3ImporterTest extends TripalTestCase {
  6. use DBTransaction;
  7. /**
  8. * Confirm basic GFF importer functionality.
  9. *
  10. * @group gff
  11. */
  12. public function testGFFImporter() {
  13. $gff_file = ['file_local' => __DIR__ . '/../data/small_gene.gff'];
  14. $fasta = ['file_local' => __DIR__ . '/../data/short_scaffold.fasta'];
  15. $analysis = factory('chado.analysis')->create();
  16. $organism = factory('chado.organism')->create();
  17. $run_args = [
  18. 'analysis_id' => $analysis->analysis_id,
  19. 'organism_id' => $organism->organism_id,
  20. 'use_transaction' => 1,
  21. 'add_only' => 0,
  22. 'update' => 1,
  23. 'create_organism' => 0,
  24. 'create_target' => 0,
  25. // regexps for mRNA and protein.
  26. 're_mrna' => NULL,
  27. 're_protein' => NULL,
  28. // optional
  29. 'target_organism_id' => NULL,
  30. 'target_type' => NULL,
  31. 'start_line' => NULL,
  32. 'landmark_type' => NULL,
  33. 'alt_id_attr' => NULL,
  34. ];
  35. $this->loadLandmarks($analysis, $organism, $fasta);
  36. $this->runGFFLoader($run_args, $gff_file);
  37. // This protein is an explicit protein / polypeptide imported from the GFF
  38. // file.
  39. $name = 'test_protein_001.1';
  40. $query = db_select('chado.feature', 'f')
  41. ->fields('f', ['uniquename'])
  42. ->condition('f.uniquename', $name)
  43. ->execute()
  44. ->fetchField();
  45. $this->assertEquals($name, $query);
  46. }
  47. /**
  48. * Run the GFF loader on gff_tag_unescaped_character.gff for testing.
  49. *
  50. * This tests whether the GFF loader adds IDs that contain a comma.
  51. * The GFF loader should allow it
  52. */
  53. public function testGFFImporterUnescapedTagWithComma() {
  54. $gff_file = ['file_local' => __DIR__ . '/../data/gff_tag_unescaped_character.gff'];
  55. $analysis = factory('chado.analysis')->create();
  56. $organism = factory('chado.organism')->create();
  57. $run_args = [
  58. 'analysis_id' => $analysis->analysis_id,
  59. 'organism_id' => $organism->organism_id,
  60. 'use_transaction' => 1,
  61. 'add_only' => 0,
  62. 'update' => 1,
  63. 'create_organism' => 0,
  64. 'create_target' => 0,
  65. // regexps for mRNA and protein.
  66. 're_mrna' => NULL,
  67. 're_protein' => NULL,
  68. // optional
  69. 'target_organism_id' => NULL,
  70. 'target_type' => NULL,
  71. 'start_line' => NULL,
  72. 'landmark_type' => NULL,
  73. 'alt_id_attr' => NULL,
  74. ];
  75. $this->loadLandmarks($analysis, $organism);
  76. // This should throw an error based on the tag name having the comma
  77. $hasException = false;
  78. try {
  79. $this->runGFFLoader($run_args, $gff_file);
  80. }
  81. catch (\Exception $ex) {
  82. $hasException = true;
  83. }
  84. $this->assertEquals($hasException, true);
  85. }
  86. /**
  87. * Run the GFF loader on small_gene.gff for testing.
  88. *
  89. * This tests whether the GFF loader adds Alias attributes
  90. * The GFF loader should allow it
  91. */
  92. public function testGFFImporterTagAliasVerification() {
  93. $gff_file = ['file_local' =>
  94. __DIR__ . '/../data/small_gene.gff'];
  95. $fasta = ['file_local' => __DIR__ . '/../data/short_scaffold.fasta'];
  96. $analysis = factory('chado.analysis')->create();
  97. $organism = factory('chado.organism')->create();
  98. $run_args = [
  99. 'analysis_id' => $analysis->analysis_id,
  100. 'organism_id' => $organism->organism_id,
  101. 'use_transaction' => 1,
  102. 'add_only' => 0,
  103. 'update' => 1,
  104. 'create_organism' => 0,
  105. 'create_target' => 0,
  106. // regexps for mRNA and protein.
  107. 're_mrna' => NULL,
  108. 're_protein' => NULL,
  109. // optional
  110. 'target_organism_id' => NULL,
  111. 'target_type' => NULL,
  112. 'start_line' => NULL,
  113. 'landmark_type' => NULL,
  114. 'alt_id_attr' => NULL,
  115. ];
  116. $this->loadLandmarks($analysis, $organism, $fasta);
  117. $this->runGFFLoader($run_args, $gff_file);
  118. $results = db_query("SELECT * FROM chado.feature_synonym fs
  119. LEFT JOIN chado.synonym s ON (fs.feature_synonym_id = s.synonym_id)
  120. WHERE name = 'first_test_gene'
  121. ;",array());
  122. $this->assertEquals($results->rowCount(), 1);
  123. }
  124. /**
  125. * Run the GFF loader on gff_tag_parent_verification.gff for testing.
  126. *
  127. * This tests whether the GFF loader adds Parent attributes
  128. * The GFF loader should allow it
  129. */
  130. public function testGFFImporterTagParentVerification() {
  131. $gff_file = ['file_local' =>
  132. __DIR__ . '/../data/gff_tag_parent_verification.gff'];
  133. $analysis = factory('chado.analysis')->create();
  134. $organism = factory('chado.organism')->create();
  135. $run_args = [
  136. 'analysis_id' => $analysis->analysis_id,
  137. 'organism_id' => $organism->organism_id,
  138. 'use_transaction' => 1,
  139. 'add_only' => 0,
  140. 'update' => 1,
  141. 'create_organism' => 0,
  142. 'create_target' => 0,
  143. // regexps for mRNA and protein.
  144. 're_mrna' => NULL,
  145. 're_protein' => NULL,
  146. // optional
  147. 'target_organism_id' => NULL,
  148. 'target_type' => NULL,
  149. 'start_line' => NULL,
  150. 'landmark_type' => NULL,
  151. 'alt_id_attr' => NULL,
  152. ];
  153. $this->loadLandmarks($analysis, $organism);
  154. $this->runGFFLoader($run_args, $gff_file);
  155. $results = db_query("SELECT * FROM chado.feature_relationship fr
  156. LEFT JOIN chado.feature f ON (fr.object_id = f.feature_id)
  157. WHERE f.uniquename = 'FRAEX38873_v2_000000010'
  158. ;",array());
  159. // Found parent via object_id FRAEX38873_v2_000000010
  160. $this->assertEquals($results->rowCount(), 1);
  161. }
  162. /**
  163. * Run the GFF loader on gff_tagvalue_unescaped_character.gff for testing.
  164. *
  165. * This tests whether the GFF loader adds IDs that contain a comma.
  166. * The GFF loader should allow it
  167. */
  168. public function testGFFImporterEscapedTagValueWithEncodedCharacter() {
  169. $gff_file = ['file_local' =>
  170. __DIR__ . '/../data/gff_tagvalue_encoded_character.gff'];
  171. $analysis = factory('chado.analysis')->create();
  172. $organism = factory('chado.organism')->create();
  173. $run_args = [
  174. 'analysis_id' => $analysis->analysis_id,
  175. 'organism_id' => $organism->organism_id,
  176. 'use_transaction' => 1,
  177. 'add_only' => 0,
  178. 'update' => 1,
  179. 'create_organism' => 0,
  180. 'create_target' => 0,
  181. // regexps for mRNA and protein.
  182. 're_mrna' => NULL,
  183. 're_protein' => NULL,
  184. // optional
  185. 'target_organism_id' => NULL,
  186. 'target_type' => NULL,
  187. 'start_line' => NULL,
  188. 'landmark_type' => NULL,
  189. 'alt_id_attr' => NULL,
  190. ];
  191. $this->loadLandmarks($analysis, $organism);
  192. $this->runGFFLoader($run_args, $gff_file);
  193. $results = db_query("SELECT * FROM chado.feature
  194. WHERE uniquename = 'FRAEX38873_v2_000000010,20';",array());
  195. $this->assertEquals($results->rowCount(), 1);
  196. }
  197. /**
  198. * Run the GFF loader on gff_tagvalue_unescaped_character.gff for testing.
  199. *
  200. * This tests whether the GFF loader adds IDs that contain a comma.
  201. * The GFF loader should allow it
  202. */
  203. public function testGFFImporterUnescapedTagValueWithComma() {
  204. $gff_file = ['file_local' => __DIR__ . '/../data/gff_tagvalue_unescaped_character.gff'];
  205. $analysis = factory('chado.analysis')->create();
  206. $organism = factory('chado.organism')->create();
  207. $run_args = [
  208. 'analysis_id' => $analysis->analysis_id,
  209. 'organism_id' => $organism->organism_id,
  210. 'use_transaction' => 1,
  211. 'add_only' => 0,
  212. 'update' => 1,
  213. 'create_organism' => 0,
  214. 'create_target' => 0,
  215. // regexps for mRNA and protein.
  216. 're_mrna' => NULL,
  217. 're_protein' => NULL,
  218. // optional
  219. 'target_organism_id' => NULL,
  220. 'target_type' => NULL,
  221. 'start_line' => NULL,
  222. 'landmark_type' => NULL,
  223. 'alt_id_attr' => NULL,
  224. ];
  225. $this->loadLandmarks($analysis, $organism);
  226. // This should throw an error based on the tag name having the comma
  227. $hasException = false;
  228. try {
  229. $this->runGFFLoader($run_args, $gff_file);
  230. }
  231. catch (\Exception $ex) {
  232. $hasException = true;
  233. }
  234. $this->assertEquals($hasException, true);
  235. }
  236. /**
  237. * Run the GFF loader on gff_seqid_invalid_character.gff for testing.
  238. * Seqids seem to also be called landmarks within GFF loader.
  239. * This tests whether the GFF loader has any issues with characters like
  240. * single quotes.
  241. */
  242. public function testGFFImporterSeqidWithInvalidCharacter() {
  243. $gff_file = ['file_local' =>
  244. __DIR__ . '/../data/gff_seqid_invalid_character.gff'];
  245. $analysis = factory('chado.analysis')->create();
  246. $organism = factory('chado.organism')->create();
  247. $run_args = [
  248. 'analysis_id' => $analysis->analysis_id,
  249. 'organism_id' => $organism->organism_id,
  250. 'use_transaction' => 1,
  251. 'add_only' => 0,
  252. 'update' => 1,
  253. 'create_organism' => 0,
  254. 'create_target' => 0,
  255. // regexps for mRNA and protein.
  256. 're_mrna' => NULL,
  257. 're_protein' => NULL,
  258. // optional
  259. 'target_organism_id' => NULL,
  260. 'target_type' => NULL,
  261. 'start_line' => NULL,
  262. 'landmark_type' => NULL,
  263. 'alt_id_attr' => NULL,
  264. ];
  265. $this->loadLandmarks($analysis, $organism);
  266. // This will produce an exception due to quote character in Seqid
  267. $hasException = false;
  268. try {
  269. $this->runGFFLoader($run_args, $gff_file);
  270. }
  271. catch (\Exception $ex) {
  272. $hasException = true;
  273. }
  274. $this->assertEquals($hasException, true);
  275. }
  276. /**
  277. * Run the GFF loader on gff_unescaped_ids.gff for testing.
  278. *
  279. * This tests whether the GFF loader adds IDs that contain whitespaces.
  280. * The GFF loader should allow it
  281. */
  282. public function testGFFImporterUnescapedWhitespaceID() {
  283. $gff_file = ['file_local' => __DIR__ . '/../data/gff_unescaped_ids.gff'];
  284. $analysis = factory('chado.analysis')->create();
  285. $organism = factory('chado.organism')->create();
  286. $run_args = [
  287. 'analysis_id' => $analysis->analysis_id,
  288. 'organism_id' => $organism->organism_id,
  289. 'use_transaction' => 1,
  290. 'add_only' => 0,
  291. 'update' => 1,
  292. 'create_organism' => 0,
  293. 'create_target' => 0,
  294. // regexps for mRNA and protein.
  295. 're_mrna' => NULL,
  296. 're_protein' => NULL,
  297. // optional
  298. 'target_organism_id' => NULL,
  299. 'target_type' => NULL,
  300. 'start_line' => NULL,
  301. 'landmark_type' => NULL,
  302. 'alt_id_attr' => NULL,
  303. ];
  304. $this->loadLandmarks($analysis, $organism);
  305. // This should go through just fine
  306. $this->runGFFLoader($run_args, $gff_file);
  307. $results = db_query("SELECT * FROM chado.feature WHERE uniquename =
  308. 'FRAEX38873_v2_000000010 SPACED';");
  309. $this->assertEquals($results->rowCount(), 1);
  310. }
  311. /**
  312. * Run the GFF loader on gff_rightarrow_ids.gff for testing.
  313. *
  314. * This tests whether the GFF loader fails if ID contains
  315. * arrow >. It should not fail.
  316. */
  317. public function testGFFImporterRightArrowID() {
  318. $gff_file = ['file_local' => __DIR__ . '/../data/gff_rightarrow_id.gff'];
  319. $analysis = factory('chado.analysis')->create();
  320. $organism = factory('chado.organism')->create();
  321. $run_args = [
  322. 'analysis_id' => $analysis->analysis_id,
  323. 'organism_id' => $organism->organism_id,
  324. 'use_transaction' => 1,
  325. 'add_only' => 0,
  326. 'update' => 1,
  327. 'create_organism' => 0,
  328. 'create_target' => 0,
  329. // regexps for mRNA and protein.
  330. 're_mrna' => NULL,
  331. 're_protein' => NULL,
  332. // optional
  333. 'target_organism_id' => NULL,
  334. 'target_type' => NULL,
  335. 'start_line' => NULL,
  336. 'landmark_type' => NULL,
  337. 'alt_id_attr' => NULL,
  338. ];
  339. $this->loadLandmarks($analysis, $organism);
  340. // This will produce an exception due to right arrow in ID
  341. $this->runGFFLoader($run_args, $gff_file);
  342. $results = db_query("SELECT * FROM chado.feature
  343. WHERE uniquename = '>FRAEX38873_v2_000000010';");
  344. // We expect this record to get inserted.
  345. $this->assertEquals($results->rowCount(), 1);
  346. }
  347. /**
  348. * Run the GFF loader on gff_duplicate_ids.gff for testing.
  349. *
  350. * This tests whether the GFF loader detects duplicate IDs which makes a
  351. * GFF file invalid since IDs should be unique. The GFF loader should throw
  352. * and exception which this test checks for
  353. */
  354. public function testGFFImporterDuplicateIDsExceptionCheck() {
  355. $gff_file = ['file_local' => __DIR__ . '/../data/gff_duplicate_ids.gff'];
  356. $analysis = factory('chado.analysis')->create();
  357. $organism = factory('chado.organism')->create();
  358. $run_args = [
  359. 'analysis_id' => $analysis->analysis_id,
  360. 'organism_id' => $organism->organism_id,
  361. 'use_transaction' => 1,
  362. 'add_only' => 0,
  363. 'update' => 1,
  364. 'create_organism' => 0,
  365. 'create_target' => 0,
  366. // regexps for mRNA and protein.
  367. 're_mrna' => NULL,
  368. 're_protein' => NULL,
  369. // optional
  370. 'target_organism_id' => NULL,
  371. 'target_type' => NULL,
  372. 'start_line' => NULL,
  373. 'landmark_type' => NULL,
  374. 'alt_id_attr' => NULL,
  375. ];
  376. $hasException = false;
  377. try {
  378. $this->loadLandmarks($analysis, $organism);
  379. // This will produce an exception of duplicate feature ID
  380. $this->runGFFLoader($run_args, $gff_file);
  381. }
  382. catch(\Exception $ex) {
  383. $hasException = true;
  384. }
  385. // We expect an exception to happen so we are looking for a return of true
  386. $this->assertEquals($hasException, true);
  387. }
  388. /**
  389. * Run the GFF loader on gff_invalidstartend.gff for testing.
  390. *
  391. * This tests whether the GFF loader fixes start end values
  392. */
  393. public function testGFFImporterInvalidStartEnd() {
  394. $gff_file = ['file_local' => __DIR__ . '/../data/gff_invalidstartend.gff'];
  395. $analysis = factory('chado.analysis')->create();
  396. $organism = factory('chado.organism')->create();
  397. $run_args = [
  398. 'analysis_id' => $analysis->analysis_id,
  399. 'organism_id' => $organism->organism_id,
  400. 'use_transaction' => 1,
  401. 'add_only' => 0,
  402. 'update' => 1,
  403. 'create_organism' => 0,
  404. 'create_target' => 0,
  405. // regexps for mRNA and protein.
  406. 're_mrna' => NULL,
  407. 're_protein' => NULL,
  408. // optional
  409. 'target_organism_id' => NULL,
  410. 'target_type' => NULL,
  411. 'start_line' => NULL,
  412. 'landmark_type' => NULL,
  413. 'alt_id_attr' => NULL,
  414. ];
  415. $this->loadLandmarks($analysis, $organism);
  416. // This will produce an exception of duplicate feature ID
  417. $this->runGFFLoader($run_args, $gff_file);
  418. $results = db_select('chado.feature', 'f')
  419. ->fields('f', ['uniquename'])
  420. ->condition('f.uniquename', 'FRAEX38873_v2_000000010')
  421. ->execute()
  422. ->fetchAll();
  423. // We expect the feature to still be added to the database
  424. // since the GFF Loader caters for reversing backward numbers
  425. $this->assertEquals(count($results), 1);
  426. }
  427. /**
  428. * Run the GFF loader on gff_score.gff for testing.
  429. *
  430. * This tests whether the GFF loader interprets the score values
  431. */
  432. public function testGFFImporterScoreTest() {
  433. $gff_file = ['file_local' => __DIR__ . '/../data/gff_score.gff'];
  434. $analysis = factory('chado.analysis')->create();
  435. $organism = factory('chado.organism')->create();
  436. $run_args = [
  437. 'analysis_id' => $analysis->analysis_id,
  438. 'organism_id' => $organism->organism_id,
  439. 'use_transaction' => 1,
  440. 'add_only' => 0,
  441. 'update' => 1,
  442. 'create_organism' => 0,
  443. 'create_target' => 0,
  444. // regexps for mRNA and protein.
  445. 're_mrna' => NULL,
  446. 're_protein' => NULL,
  447. // optional
  448. 'target_organism_id' => NULL,
  449. 'target_type' => NULL,
  450. 'start_line' => NULL,
  451. 'landmark_type' => NULL,
  452. 'alt_id_attr' => NULL,
  453. ];
  454. $this->loadLandmarks($analysis, $organism);
  455. $this->runGFFLoader($run_args, $gff_file);
  456. // Test that integer values get placed in the db
  457. $results = db_query('SELECT * FROM chado.analysisfeature WHERE significance = 2 LIMIT 1', array(
  458. ));
  459. foreach ($results as $row){
  460. $this->assertEquals($row->significance,2);
  461. }
  462. // Test that decimal/float values get placed in the db
  463. $results = db_query('SELECT * FROM chado.analysisfeature WHERE significance = 2.5 LIMIT 1', array(
  464. ));
  465. foreach ($results as $row){
  466. $this->assertEquals($row->significance,2.5);
  467. }
  468. // Test that negative score values get placed in the db
  469. $results = db_query('SELECT * FROM chado.analysisfeature WHERE significance = -2.5 LIMIT 1', array(
  470. ));
  471. foreach ($results as $row){
  472. $this->assertEquals($row->significance,-2.5);
  473. }
  474. }
  475. /**
  476. * Run the GFF loader on gff_strand.gff for testing.
  477. *
  478. * This tests whether the GFF loader interprets the strand values
  479. */
  480. public function testGFFImporterInvalidStrandTest() {
  481. $gff_file = ['file_local' => __DIR__ . '/../data/gff_strand_invalid.gff'];
  482. $analysis = factory('chado.analysis')->create();
  483. $organism = factory('chado.organism')->create();
  484. $run_args = [
  485. 'analysis_id' => $analysis->analysis_id,
  486. 'organism_id' => $organism->organism_id,
  487. 'use_transaction' => 1,
  488. 'add_only' => 0,
  489. 'update' => 1,
  490. 'create_organism' => 0,
  491. 'create_target' => 0,
  492. // regexps for mRNA and protein.
  493. 're_mrna' => NULL,
  494. 're_protein' => NULL,
  495. // optional
  496. 'target_organism_id' => NULL,
  497. 'target_type' => NULL,
  498. 'start_line' => NULL,
  499. 'landmark_type' => NULL,
  500. 'alt_id_attr' => NULL,
  501. ];
  502. $this->loadLandmarks($analysis, $organism);
  503. $isException = false;
  504. try {
  505. $this->runGFFLoader($run_args, $gff_file);
  506. }
  507. catch(\Exception $ex) {
  508. $isException = true;
  509. }
  510. $this->assertEquals($isException, true);
  511. }
  512. /**
  513. * Run the GFF loader on gff_strand.gff for testing.
  514. *
  515. * This tests whether the GFF loader interprets the strand values
  516. */
  517. public function testGFFImporterStrandTest() {
  518. $gff_file = ['file_local' => __DIR__ . '/../data/gff_strand.gff'];
  519. $analysis = factory('chado.analysis')->create();
  520. $organism = factory('chado.organism')->create();
  521. $run_args = [
  522. 'analysis_id' => $analysis->analysis_id,
  523. 'organism_id' => $organism->organism_id,
  524. 'use_transaction' => 1,
  525. 'add_only' => 0,
  526. 'update' => 1,
  527. 'create_organism' => 0,
  528. 'create_target' => 0,
  529. // regexps for mRNA and protein.
  530. 're_mrna' => NULL,
  531. 're_protein' => NULL,
  532. // optional
  533. 'target_organism_id' => NULL,
  534. 'target_type' => NULL,
  535. 'start_line' => NULL,
  536. 'landmark_type' => NULL,
  537. 'alt_id_attr' => NULL,
  538. ];
  539. $this->loadLandmarks($analysis, $organism);
  540. $this->runGFFLoader($run_args, $gff_file);
  541. // Test that integer values for strand that get placed in the db
  542. // Strand data gets saved in chado.featureloc
  543. $results = db_query('SELECT * FROM chado.featureloc fl
  544. LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id)
  545. WHERE uniquename = :uniquename LIMIT 1',
  546. array(
  547. ':uniquename' => 'FRAEX38873_v2_000000010'
  548. )
  549. );
  550. foreach ($results as $row) {
  551. $this->assertEquals($row->strand, 1); // +
  552. }
  553. $results = db_query('SELECT * FROM chado.featureloc fl
  554. LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id)
  555. WHERE uniquename = :uniquename LIMIT 1',
  556. array(
  557. ':uniquename' => 'FRAEX38873_v2_000000010.1'
  558. )
  559. );
  560. foreach ($results as $row) {
  561. $this->assertEquals($row->strand,-1); // -
  562. }
  563. $results = db_query('SELECT * FROM chado.featureloc fl
  564. LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id)
  565. WHERE uniquename = :uniquename LIMIT 1',
  566. array(
  567. ':uniquename' => 'FRAEX38873_v2_000000010.2'
  568. )
  569. );
  570. foreach ($results as $row) {
  571. $this->assertEquals($row->strand, 0); // ?
  572. }
  573. $results = db_query('SELECT * FROM chado.featureloc fl
  574. LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id)
  575. WHERE uniquename = :uniquename LIMIT 1',
  576. array(
  577. ':uniquename' => 'FRAEX38873_v2_000000010.3'
  578. )
  579. );
  580. foreach ($results as $row) {
  581. $this->assertEquals($row->strand, 0); // .
  582. }
  583. // This GFF should create 5 featureloc records
  584. $results = db_query('SELECT COUNT(*) as c FROM chado.featureloc;');
  585. foreach ($results as $row) {
  586. $this->assertEquals($row->c, 5);
  587. }
  588. }
  589. /**
  590. * Run the GFF loader on gff_phase.gff for testing.
  591. *
  592. * This tests whether the GFF loader interprets the phase values correctly
  593. * for CDS rows.
  594. */
  595. public function testGFFImporterPhaseTest() {
  596. $gff_file = ['file_local' => __DIR__ . '/../data/gff_phase.gff'];
  597. $analysis = factory('chado.analysis')->create();
  598. $organism = factory('chado.organism')->create();
  599. $run_args = [
  600. 'analysis_id' => $analysis->analysis_id,
  601. 'organism_id' => $organism->organism_id,
  602. 'use_transaction' => 1,
  603. 'add_only' => 0,
  604. 'update' => 1,
  605. 'create_organism' => 0,
  606. 'create_target' => 0,
  607. // regexps for mRNA and protein.
  608. 're_mrna' => NULL,
  609. 're_protein' => NULL,
  610. // optional
  611. 'target_organism_id' => NULL,
  612. 'target_type' => NULL,
  613. 'start_line' => NULL,
  614. 'landmark_type' => NULL,
  615. 'alt_id_attr' => NULL,
  616. ];
  617. $this->loadLandmarks($analysis, $organism);
  618. $this->runGFFLoader($run_args, $gff_file);
  619. $results = db_query("SELECT * FROM chado.feature
  620. WHERE uniquename = :uniquename LIMIT 1", array(
  621. ':uniquename' => 'FRAEX38873_v2_000000010.1.cds1'
  622. )
  623. );
  624. // Check to make sure it returns a single row (implying a match)
  625. // by the uniquename specified
  626. $this->assertEquals($results->rowCount(), 1);
  627. $results = db_query("SELECT * FROM chado.featureloc
  628. WHERE phase = 1 LIMIT 1", array(
  629. )
  630. );
  631. // Check to make sure it returns a single row (implying a match)
  632. // by phase value 1
  633. $this->assertEquals($results->rowCount(), 1);
  634. }
  635. /**
  636. * Run the GFF loader on gff_phase_invalid_number.gff for testing.
  637. *
  638. * This tests whether the GFF loader interprets the phase values correctly
  639. * for CDS rows when a number outside of the range 0,1,2 is specified.
  640. */
  641. public function testGFFImporterInvalidPhaseNumberTest() {
  642. $gff_file = ['file_local' => __DIR__ . '/../data/gff_phase_invalid_number.gff'];
  643. $analysis = factory('chado.analysis')->create();
  644. $organism = factory('chado.organism')->create();
  645. $run_args = [
  646. 'analysis_id' => $analysis->analysis_id,
  647. 'organism_id' => $organism->organism_id,
  648. 'use_transaction' => 1,
  649. 'add_only' => 0,
  650. 'update' => 1,
  651. 'create_organism' => 0,
  652. 'create_target' => 0,
  653. // regexps for mRNA and protein.
  654. 're_mrna' => NULL,
  655. 're_protein' => NULL,
  656. // optional
  657. 'target_organism_id' => NULL,
  658. 'target_type' => NULL,
  659. 'start_line' => NULL,
  660. 'landmark_type' => NULL,
  661. 'alt_id_attr' => NULL,
  662. ];
  663. $this->loadLandmarks($analysis, $organism);
  664. $hasException = false;
  665. try {
  666. $this->runGFFLoader($run_args, $gff_file);
  667. }
  668. catch (\Exception $ex) {
  669. $hasException = true;
  670. }
  671. // An exception should have been thrown since the phase number is invalid
  672. $this->assertEquals($hasException, true);
  673. }
  674. /**
  675. * Run the GFF loader on gff_phase_invalid_character.gff for testing.
  676. *
  677. * This tests whether the GFF loader interprets the phase values correctly
  678. * for CDS rows when a character outside of the range 0,1,2 is specified.
  679. */
  680. public function testGFFImporterInvalidPhaseCharacterTest() {
  681. $gff_file = ['file_local' => __DIR__ . '/../data/gff_phase_invalid_character.gff'];
  682. $analysis = factory('chado.analysis')->create();
  683. $organism = factory('chado.organism')->create();
  684. $run_args = [
  685. 'analysis_id' => $analysis->analysis_id,
  686. 'organism_id' => $organism->organism_id,
  687. 'use_transaction' => 1,
  688. 'add_only' => 0,
  689. 'update' => 1,
  690. 'create_organism' => 0,
  691. 'create_target' => 0,
  692. // regexps for mRNA and protein.
  693. 're_mrna' => NULL,
  694. 're_protein' => NULL,
  695. // optional
  696. 'target_organism_id' => NULL,
  697. 'target_type' => NULL,
  698. 'start_line' => NULL,
  699. 'landmark_type' => NULL,
  700. 'alt_id_attr' => NULL,
  701. ];
  702. $this->loadLandmarks($analysis, $organism);
  703. $hasException = false;
  704. try {
  705. $this->runGFFLoader($run_args, $gff_file);
  706. }
  707. catch (\Exception $ex) {
  708. $hasException = true;
  709. }
  710. // An exception should have been thrown since the phase number is invalid
  711. $this->assertEquals($hasException, true);
  712. }
  713. /**
  714. * Run the GFF loader on small_gene.gff for testing.
  715. *
  716. * This gff has many attributes that we would like to test in the
  717. * testGFFImporterAttribute*() methods.
  718. */
  719. private function initGFFImporterAttributes() {
  720. $gff = ['file_local' => __DIR__ . '/../data/small_gene.gff'];
  721. $fasta = ['file_local' => __DIR__ . '/../data/short_scaffold.fasta'];
  722. $analysis = factory('chado.analysis')->create();
  723. $organism = factory('chado.organism')->create();
  724. $run_args = [
  725. 'analysis_id' => $analysis->analysis_id,
  726. 'organism_id' => $organism->organism_id,
  727. 'use_transaction' => 1,
  728. 'add_only' => 0,
  729. 'update' => 1,
  730. 'create_organism' => 0,
  731. 'create_target' => 0,
  732. ///regexps for mRNA and protein.
  733. 're_mrna' => NULL,
  734. 're_protein' => NULL,
  735. //optional
  736. 'target_organism_id' => $organism->organism_id,
  737. 'target_type' => NULL,
  738. 'start_line' => NULL,
  739. 'landmark_type' => NULL,
  740. 'alt_id_attr' => NULL,
  741. ];
  742. $this->loadLandmarks($analysis, $organism, $fasta);
  743. $this->runGFFLoader($run_args, $gff);
  744. $this->organism = $organism;
  745. $this->analysis = $analysis;
  746. $this->gene_cvt = chado_get_cvterm(array(
  747. 'name' => 'gene',
  748. 'cv_id' => array(
  749. 'name' => 'sequence',
  750. ),
  751. ))->cvterm_id;
  752. $this->mrna_cvt = chado_get_cvterm(array(
  753. 'name' => 'mRNA',
  754. 'cv_id' => array(
  755. 'name' => 'sequence',
  756. ),
  757. ))->cvterm_id;
  758. $this->supercontig_cvt = chado_get_cvterm(array(
  759. 'name' => 'supercontig',
  760. 'cv_id' => array(
  761. 'name' => 'sequence',
  762. ),
  763. ))->cvterm_id;
  764. $this->gene_1_uname = 'test_gene_001';
  765. $this->gene_2_uname = 'test_gene_002';
  766. $this->scaffold_1_uname = 'scaffold1';
  767. }
  768. /**
  769. * Ensures that the feature record is loaded correctly into chado.
  770. *
  771. * @group gff
  772. */
  773. public function testGFFImporterAttributeFeature() {
  774. $this->initGFFImporterAttributes();
  775. $organism = $this->organism;
  776. $query = db_select('chado.feature', 'f')
  777. ->fields('f')
  778. ->condition('uniquename', $this->gene_1_uname)
  779. ->condition('type_id', $this->gene_cvt)
  780. ->execute();
  781. $gene_1 = $query->fetchObject();
  782. $this->assertEquals('test_gene_001', $gene_1->uniquename);
  783. $this->assertEquals('test_gene_001', $gene_1->name);
  784. $this->assertEquals($organism->organism_id, $gene_1->organism_id);
  785. $this->assertEquals($this->gene_cvt, $gene_1->type_id);
  786. }
  787. /**
  788. * Ensures the feature alias is loaded correctly into chado.
  789. *
  790. * @group gff
  791. */
  792. public function testGFFImporterAttributeAlias() {
  793. $this->initGFFImporterAttributes();
  794. $alias = 'first_test_gene';
  795. $gene_1 = db_select('chado.feature', 'f')
  796. ->fields('f')
  797. ->condition('uniquename', $this->gene_1_uname)
  798. ->condition('type_id', $this->gene_cvt)
  799. ->execute()->fetchObject();
  800. $query = db_select('chado.feature_synonym', 'fs');
  801. $query->join('chado.synonym', 's', 's.synonym_id = fs.synonym_id');
  802. $query->fields('s');
  803. $query->condition('fs.feature_id', $gene_1->feature_id);
  804. $query = $query->execute();
  805. $result = $query->fetchObject();
  806. $this->assertEquals($alias, $result->name);
  807. }
  808. /**
  809. * Ensures that the dbxref records are loaded correctly into chado.
  810. *
  811. * @group gff
  812. */
  813. public function testGFFImporterAttributeDbxref() {
  814. $this->initGFFImporterAttributes();
  815. $test_db_name = 'TEST_DB';
  816. $dbx_accession = 'test_gene_dbx_001';
  817. $test_db = chado_get_db(array('name' => $test_db_name));
  818. $gff_db = chado_get_db(array('name' => 'GFF_source'));
  819. $gene_1 = db_select('chado.feature', 'f')
  820. ->fields('f')
  821. ->condition('uniquename', $this->gene_1_uname)
  822. ->condition('type_id', $this->gene_cvt)
  823. ->execute()->fetchObject();
  824. $dbx_query = db_select('chado.feature_dbxref', 'fdbx');
  825. $dbx_query->join('chado.dbxref', 'dbx', 'dbx.dbxref_id = fdbx.dbxref_id');
  826. $dbx_query->fields('dbx');
  827. $dbx_query->condition('fdbx.feature_id', $gene_1->feature_id);
  828. $gff_query = clone $dbx_query;
  829. $dbx_query->condition('dbx.db_id', $test_db->db_id);
  830. $dbx_query = $dbx_query->execute();
  831. $gff_query->condition('dbx.db_id', $gff_db->db_id);
  832. $gff_query = $gff_query->execute();
  833. $dbxref = $dbx_query->fetchObject();
  834. $gff_dbxref = $gff_query->fetchObject();
  835. $this->assertEquals($dbx_accession, $dbxref->accession);
  836. $this->assertEquals($this->gene_1_uname, $gff_dbxref->accession);
  837. }
  838. /**
  839. * Ensures ontology term records loaded correctly into chado.
  840. *
  841. * @group gff
  842. */
  843. public function testGFFImporterAttributeOntology() {
  844. $this->initGFFImporterAttributes();
  845. $ontology_db = 'SO';
  846. $ontology_accession = '0000704';
  847. $gene_1 = db_select('chado.feature', 'f')
  848. ->fields('f')
  849. ->condition('uniquename', $this->gene_1_uname)
  850. ->condition('type_id', $this->gene_cvt)
  851. ->execute()->fetchObject();
  852. $term = chado_get_cvterm(array(
  853. 'dbxref_id' => array(
  854. 'accession' => $ontology_accession,
  855. 'db_id' => array(
  856. 'name' => $ontology_db,
  857. ),
  858. ),
  859. ));
  860. $feature_cvt = db_select('chado.feature_cvterm', 'fcvt')
  861. ->fields('fcvt')
  862. ->condition('cvterm_id', $term->cvterm_id)
  863. ->condition('feature_id', $gene_1->feature_id)
  864. ->execute();
  865. $this->assertEquals(1, $feature_cvt->rowCount());
  866. }
  867. /**
  868. * Ensures feature parent record loaded correctly into chado.
  869. *
  870. * @group gff
  871. */
  872. public function testGFFImporterAttributeParent() {
  873. $this->initGFFImporterAttributes();
  874. $mrna_uname = 'test_mrna_001.1';
  875. $rel_cvt = chado_get_cvterm(array(
  876. 'name' => 'part_of',
  877. 'cv_id' => array(
  878. 'name' => 'sequence',
  879. ),
  880. ))->cvterm_id;
  881. $mrna = db_select('chado.feature', 'f')
  882. ->fields('f')
  883. ->condition('uniquename', $mrna_uname)
  884. ->condition('type_id', $this->mrna_cvt)
  885. ->execute()->fetchObject();
  886. $query = db_select('chado.feature_relationship', 'fr');
  887. $query->join('chado.feature', 'f', 'f.feature_id = fr.object_id');
  888. $query->fields('f');
  889. $query->condition('fr.subject_id', $mrna->feature_id);
  890. $query->condition('fr.type_id', $rel_cvt);
  891. $query = $query->execute();
  892. $parent = $query->fetchObject();
  893. $this->assertEquals('test_gene_001', $parent->uniquename);
  894. $this->assertEquals('test_gene_001', $parent->name);
  895. $this->assertEquals($this->gene_cvt, $parent->type_id);
  896. $this->assertEquals($this->organism->organism_id, $parent->organism_id);
  897. }
  898. /**
  899. * Ensure target record loaded correctly into chado.
  900. *
  901. * @group gff
  902. */
  903. public function testGFFImporterAttributeTarget() {
  904. $this->initGFFImporterAttributes();
  905. $target_feature = 'scaffold1';
  906. $start = 99;
  907. $end = 200;
  908. $target_type = 'supercontig';
  909. $target_cvt = chado_get_cvterm(array(
  910. 'name' => $target_type,
  911. 'cv_id' => array(
  912. 'name' => 'sequence',
  913. ),
  914. ))->cvterm_id;
  915. $source_feature = db_select('chado.feature', 'f')
  916. ->fields('f')
  917. ->condition('uniquename', $target_feature)
  918. ->condition('type_id', $target_cvt)
  919. ->execute()->fetchObject();
  920. $gene_1 = db_select('chado.feature', 'f')
  921. ->fields('f')
  922. ->condition('uniquename', $this->gene_1_uname)
  923. ->condition('type_id', $this->gene_cvt)
  924. ->execute()->fetchObject();
  925. $featureloc = db_select('chado.featureloc', 'fl')
  926. ->fields('fl')
  927. ->condition('fl.feature_id', $gene_1->feature_id)
  928. ->condition('fl.srcfeature_id', $source_feature->feature_id)
  929. ->execute()->fetchObject();
  930. $this->assertEquals($start, $featureloc->fmin);
  931. $this->assertEquals($end, $featureloc->fmax);
  932. }
  933. /**
  934. * Ensure properties loaded correctly into chado.
  935. *
  936. * @group gff
  937. */
  938. public function testGFFImporterAttributeProperty() {
  939. $this->initGFFImporterAttributes();
  940. $gap_1 = 'test_gap_1';
  941. $gap_2 = 'test_gap_2';
  942. $note_val = 'test_gene_001_note';
  943. $gene_1 = db_select('chado.feature', 'f')
  944. ->fields('f')
  945. ->condition('uniquename', $this->gene_1_uname)
  946. ->condition('type_id', $this->gene_cvt)
  947. ->execute()->fetchObject();
  948. $gap_cvt = chado_get_cvterm(array(
  949. 'name' => 'Gap',
  950. 'cv_id' => array(
  951. 'name' => 'feature_property',
  952. ),
  953. ))->cvterm_id;
  954. $note_cvt = chado_get_cvterm(array(
  955. 'name' => 'Note',
  956. 'cv_id' => array(
  957. 'name' => 'feature_property',
  958. ),
  959. ))->cvterm_id;
  960. // Assert gaps loaded correctly
  961. $gaps_query = db_select('chado.featureprop', 'fp')
  962. ->fields('fp')
  963. ->condition('feature_id', $gene_1->feature_id)
  964. ->condition('type_id', $gap_cvt)
  965. ->execute();
  966. while (($gap = $gaps_query->fetchObject())) {
  967. $gaps[$gap->value] = $gap;
  968. }
  969. $this->assertEquals($gap_1, $gaps[$gap_1]->value);
  970. $this->assertEquals(0, $gaps[$gap_1]->rank);
  971. // Assert note loaded correctly
  972. $note = db_select('chado.featureprop', 'fp')
  973. ->fields('fp')
  974. ->condition('feature_id', $gene_1->feature_id)
  975. ->condition('type_id', $note_cvt)
  976. ->execute()->fetchObject();
  977. $this->assertEquals($note_val, $note->value);
  978. $this->assertEquals(0, $note->rank);
  979. }
  980. /**
  981. * Ensure derives from information loaded correctly into chado.
  982. *
  983. * @group gff
  984. */
  985. public function testGFFImporterAttributeDerivesFrom() {
  986. $this->initGFFImporterAttributes();
  987. $gene_2 = db_select('chado.feature', 'f')
  988. ->fields('f')
  989. ->condition('uniquename', $this->gene_2_uname)
  990. ->condition('type_id', $this->gene_cvt)
  991. ->execute()->fetchObject();
  992. $derivesfrom_cvt = chado_get_cvterm(array(
  993. 'name' => 'derives_from',
  994. 'cv_id' => array(
  995. 'name' => 'sequence',
  996. ),
  997. ))->cvterm_id;
  998. $query = db_select('chado.feature', 'f');
  999. $query->join('chado.feature_relationship', 'fr', 'f.feature_id = fr.object_id');
  1000. $query->fields('f');
  1001. $query->condition('fr.subject_id', $gene_2->feature_id);
  1002. $query->condition('fr.type_id', $derivesfrom_cvt);
  1003. $query = $query->execute();
  1004. $derivesfrom_feature = $query->fetchObject();
  1005. $this->assertEquals($this->gene_1_uname, $derivesfrom_feature->uniquename);
  1006. $this->assertEquals($this->gene_1_uname, $derivesfrom_feature->name);
  1007. $this->assertEquals($this->gene_cvt, $derivesfrom_feature->type_id);
  1008. }
  1009. /**
  1010. * Ensure FASTA information loaded correctly into chado.
  1011. *
  1012. * @group gff
  1013. */
  1014. public function testGFFImporterAttributeFastas() {
  1015. $this->initGFFImporterAttributes();
  1016. $scaffold = db_select('chado.feature', 'f')
  1017. ->fields('f')
  1018. ->condition('uniquename', $this->scaffold_1_uname)
  1019. ->condition('type_id', $this->supercontig_cvt)
  1020. ->execute()->fetchObject();
  1021. $this->assertEquals(720, $scaffold->seqlen);
  1022. $this->assertEquals(720, strlen($scaffold->residues));
  1023. $this->assertEquals('83578d8afdaec399c682aa6c0ddd29c9', $scaffold->md5checksum);
  1024. }
  1025. /**
  1026. * Test that when checked, explicit proteins are created when specified within
  1027. * the GFF file. Explicit proteins will not respect the skip_protein argument
  1028. * and will therefore be added to the database.
  1029. *
  1030. * @group gff
  1031. * @ticket 77
  1032. *
  1033. */
  1034. public function testGFFPolypeptide() {
  1035. $gff_file = ['file_local' => __DIR__ . '/../data/simpleGFF.gff'];
  1036. $analysis = factory('chado.analysis')->create();
  1037. $organism = factory('chado.organism')->create();
  1038. $run_args = [
  1039. // The new argument
  1040. 'skip_protein' => 1,
  1041. 'analysis_id' => $analysis->analysis_id,
  1042. 'organism_id' => $organism->organism_id,
  1043. 'use_transaction' => 1,
  1044. 'add_only' => 0,
  1045. 'update' => 1,
  1046. 'create_organism' => 0,
  1047. 'create_target' => 0,
  1048. // regexps for mRNA and protein.
  1049. 're_mrna' => NULL,
  1050. 're_protein' => NULL,
  1051. // optional
  1052. 'target_organism_id' => NULL,
  1053. 'target_type' => NULL,
  1054. 'start_line' => NULL,
  1055. 'landmark_type' => NULL,
  1056. 'alt_id_attr' => NULL,
  1057. ];
  1058. $this->loadLandmarks($analysis, $organism);
  1059. $this->runGFFLoader($run_args, $gff_file);
  1060. $identifier = [
  1061. 'cv_id' => ['name' => 'sequence'],
  1062. 'name' => 'polypeptide',
  1063. ];
  1064. $protein_type_id = tripal_get_cvterm($identifier);
  1065. $name = 'FRAEX38873_v2_000000010.1.3_test_protein';
  1066. $query = db_select('chado.feature', 'f')
  1067. ->fields('f', ['uniquename'])
  1068. ->condition('f.uniquename', $name)
  1069. ->condition('f.type_id', $protein_type_id->cvterm_id)
  1070. ->execute()
  1071. ->fetchAll();
  1072. $this->assertEquals(1, count($query));
  1073. }
  1074. /**
  1075. * Add a skip protein option. Test that when checked, implicit proteins are
  1076. * not created, but that they are created when unchecked.
  1077. *
  1078. * @group gff
  1079. * @ticket 77
  1080. *
  1081. */
  1082. public function testGFFNoProteinOption() {
  1083. $gff_file = ['file_local' => __DIR__ . '/../data/gff_protein_generation.gff'];
  1084. $analysis = factory('chado.analysis')->create();
  1085. $organism = factory('chado.organism')->create();
  1086. $run_args = [
  1087. //Skip protein feature generation
  1088. 'skip_protein' => 1,
  1089. 'analysis_id' => $analysis->analysis_id,
  1090. 'organism_id' => $organism->organism_id,
  1091. 'use_transaction' => 1,
  1092. 'add_only' => 0,
  1093. 'update' => 1,
  1094. 'create_organism' => 0,
  1095. 'create_target' => 0,
  1096. ///regexps for mRNA and protein.
  1097. 're_mrna' => NULL,
  1098. 're_protein' => NULL,
  1099. //optional
  1100. 'target_organism_id' => NULL,
  1101. 'target_type' => NULL,
  1102. 'start_line' => NULL,
  1103. 'landmark_type' => NULL,
  1104. 'alt_id_attr' => NULL,
  1105. ];
  1106. $this->loadLandmarks($analysis, $organism);
  1107. $this->runGFFLoader($run_args, $gff_file);
  1108. $identifier = [
  1109. 'cv_id' => ['name' => 'sequence'],
  1110. 'name' => 'polypeptide',
  1111. ];
  1112. $protein_type_id = tripal_get_cvterm($identifier);
  1113. $name = "FRAEX38873_v2_000000190.1-protein";
  1114. $results = db_select('chado.feature', 'f')
  1115. ->fields('f', ['uniquename'])
  1116. ->condition('f.uniquename', $name)
  1117. ->condition('f.type_id', $protein_type_id->cvterm_id)
  1118. ->execute()
  1119. ->fetchAll();
  1120. // There should be no proteins since we used the skip_proteins flag and no
  1121. // explicit proteins were specified in the test file
  1122. $this->assertEquals(0, count($results));
  1123. // Now perform a unit test where we do not skip proteins generation
  1124. $run_args['skip_protein'] = 0;
  1125. $this->runGFFLoader($run_args, $gff_file);
  1126. $query = db_select('chado.feature', 'f')
  1127. ->fields('f', ['uniquename'])
  1128. ->condition('f.uniquename', $name)
  1129. ->condition('f.type_id', $protein_type_id->cvterm_id)
  1130. ->execute()
  1131. ->fetchObject();
  1132. $this->assertEquals($name, $query->uniquename);
  1133. }
  1134. /**
  1135. * The GFF importer should still create explicitly defined proteins if
  1136. * skip_protein is true.
  1137. *
  1138. * @group gff
  1139. * @ticket 77
  1140. */
  1141. public function testGFFImporterLoadsExplicitProteins() {
  1142. $gff_file = ['file_local' => __DIR__ . '/../data/simpleGFF.gff'];
  1143. $analysis = factory('chado.analysis')->create();
  1144. $organism = factory('chado.organism')->create();
  1145. $run_args = [
  1146. //The new argument
  1147. 'skip_protein' => 1,
  1148. ///
  1149. 'analysis_id' => $analysis->analysis_id,
  1150. 'organism_id' => $organism->organism_id,
  1151. 'use_transaction' => 1,
  1152. 'add_only' => 0,
  1153. 'update' => 1,
  1154. 'create_organism' => 0,
  1155. 'create_target' => 0,
  1156. ///regexps for mRNA and protein.
  1157. 're_mrna' => NULL,
  1158. 're_protein' => NULL,
  1159. //optional
  1160. 'target_organism_id' => NULL,
  1161. 'target_type' => NULL,
  1162. 'start_line' => NULL,
  1163. 'landmark_type' => NULL,
  1164. 'alt_id_attr' => NULL,
  1165. ];
  1166. $this->loadLandmarks($analysis, $organism);
  1167. $this->runGFFLoader($run_args, $gff_file);
  1168. $name = 'FRAEX38873_v2_000000010.1.3_test_protein';
  1169. $query = db_select('chado.feature', 'f')
  1170. ->fields('f', ['uniquename'])
  1171. ->condition('f.uniquename', $name)
  1172. ->execute()
  1173. ->fetchField();
  1174. $this->assertEquals($name, $query);
  1175. }
  1176. private function runGFFLoader($run_args, $file) {
  1177. // silent(function ($run_args, $file) {
  1178. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/GFF3Importer');
  1179. $importer = new \GFF3Importer();
  1180. $importer->create($run_args, $file);
  1181. $importer->prepareFiles();
  1182. $importer->run();
  1183. // });
  1184. }
  1185. private function loadLandmarks($analysis, $organism, $landmark_file = array()) {
  1186. if (empty($landmark_file)) {
  1187. $landmark_file = ['file_local' => __DIR__ . '/../data/empty_landmarks.fasta'];
  1188. }
  1189. $run_args = [
  1190. 'organism_id' => $organism->organism_id,
  1191. 'analysis_id' => $analysis->analysis_id,
  1192. 'seqtype' => 'supercontig',
  1193. 'method' => 2, //default insert and update
  1194. 'match_type' => 1, //unique name default
  1195. //optional
  1196. 're_name' => NULL,
  1197. 're_uname' => NULL,
  1198. 're_accession' => NULL,
  1199. 'db_id' => NULL,
  1200. 'rel_type' => NULL,
  1201. 're_subject' => NULL,
  1202. 'parent_type' => NULL,
  1203. ];
  1204. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');
  1205. //silent(function ($run_args, $landmark_file) {
  1206. $importer = new \FASTAImporter();
  1207. $importer->create($run_args, $landmark_file);
  1208. $importer->prepareFiles();
  1209. $importer->run();
  1210. // });
  1211. }
  1212. }