GFF3ImporterTest.php 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938
  1. <?php
  2. namespace Tests;
  3. use StatonLab\TripalTestSuite\DBTransaction;
  4. use StatonLab\TripalTestSuite\TripalTestCase;
  5. class GFF3ImporterTest extends TripalTestCase {
  6. use DBTransaction;
  7. /**
  8. * Confirm basic GFF importer functionality.
  9. *
  10. * @group gff
  11. */
  12. public function testGFFImporter() {
  13. $gff_file = ['file_local' => __DIR__ . '/../data/small_gene.gff'];
  14. $fasta = ['file_local' => __DIR__ . '/../data/short_scaffold.fasta'];
  15. $analysis = factory('chado.analysis')->create();
  16. $organism = factory('chado.organism')->create();
  17. $run_args = [
  18. 'analysis_id' => $analysis->analysis_id,
  19. 'organism_id' => $organism->organism_id,
  20. 'use_transaction' => 1,
  21. 'add_only' => 0,
  22. 'update' => 1,
  23. 'create_organism' => 0,
  24. 'create_target' => 0,
  25. // regexps for mRNA and protein.
  26. 're_mrna' => NULL,
  27. 're_protein' => NULL,
  28. // optional
  29. 'target_organism_id' => NULL,
  30. 'target_type' => NULL,
  31. 'start_line' => NULL,
  32. 'landmark_type' => NULL,
  33. 'alt_id_attr' => NULL,
  34. ];
  35. $this->loadLandmarks($analysis, $organism, $fasta);
  36. $this->runGFFLoader($run_args, $gff_file);
  37. // This protein is an explicit protein / polypeptide imported from the GFF
  38. // file.
  39. $name = 'test_protein_001.1';
  40. $query = db_select('chado.feature', 'f')
  41. ->fields('f', ['uniquename'])
  42. ->condition('f.uniquename', $name)
  43. ->execute()
  44. ->fetchField();
  45. $this->assertEquals($name, $query);
  46. }
  47. /**
  48. * Run the GFF loader on gff_unescaped_ids.gff for testing.
  49. *
  50. * This tests whether the GFF loader detects invalid ID that contains
  51. * unescaped whitespaces. The GFF loader should throw an exception which this
  52. * unit test detects.
  53. */
  54. public function testGFFImporterUnescapedWhitespaceID() {
  55. $gff_file = ['file_local' => __DIR__ . '/../data/gff_unescaped_ids.gff'];
  56. $analysis = factory('chado.analysis')->create();
  57. $organism = factory('chado.organism')->create();
  58. $run_args = [
  59. 'analysis_id' => $analysis->analysis_id,
  60. 'organism_id' => $organism->organism_id,
  61. 'use_transaction' => 1,
  62. 'add_only' => 0,
  63. 'update' => 1,
  64. 'create_organism' => 0,
  65. 'create_target' => 0,
  66. // regexps for mRNA and protein.
  67. 're_mrna' => NULL,
  68. 're_protein' => NULL,
  69. // optional
  70. 'target_organism_id' => NULL,
  71. 'target_type' => NULL,
  72. 'start_line' => NULL,
  73. 'landmark_type' => NULL,
  74. 'alt_id_attr' => NULL,
  75. ];
  76. $hasException = false;
  77. try {
  78. $this->loadLandmarks($analysis, $organism);
  79. // This will produce an exception due to unescaped whitespace in ID
  80. $this->runGFFLoader($run_args, $gff_file);
  81. }
  82. catch(\Exception $ex) {
  83. $hasException = true;
  84. }
  85. // We expect an exception to happen so we are looking for a return of true
  86. $this->assertEquals($hasException, true);
  87. }
  88. /**
  89. * Run the GFF loader on gff_rightarrow_ids.gff for testing.
  90. *
  91. * This tests whether the GFF loader detects invalid ID that contains
  92. * beginning arrow >. The GFF loader should throw an exception which this
  93. * unit detects.
  94. */
  95. public function testGFFImporterRightArrowID() {
  96. $gff_file = ['file_local' => __DIR__ . '/../data/gff_rightarrow_id.gff'];
  97. $analysis = factory('chado.analysis')->create();
  98. $organism = factory('chado.organism')->create();
  99. $run_args = [
  100. 'analysis_id' => $analysis->analysis_id,
  101. 'organism_id' => $organism->organism_id,
  102. 'use_transaction' => 1,
  103. 'add_only' => 0,
  104. 'update' => 1,
  105. 'create_organism' => 0,
  106. 'create_target' => 0,
  107. // regexps for mRNA and protein.
  108. 're_mrna' => NULL,
  109. 're_protein' => NULL,
  110. // optional
  111. 'target_organism_id' => NULL,
  112. 'target_type' => NULL,
  113. 'start_line' => NULL,
  114. 'landmark_type' => NULL,
  115. 'alt_id_attr' => NULL,
  116. ];
  117. $hasException = false;
  118. try {
  119. $this->loadLandmarks($analysis, $organism);
  120. // This will produce an exception due to right arrow in ID
  121. $this->runGFFLoader($run_args, $gff_file);
  122. }
  123. catch(\Exception $ex) {
  124. $hasException = true;
  125. }
  126. // We expect an exception to happen so we are looking for a return of true
  127. $this->assertEquals($hasException, true);
  128. }
  129. /**
  130. * Run the GFF loader on gff_duplicate_ids.gff for testing.
  131. *
  132. * This tests whether the GFF loader detects duplicate IDs which makes a
  133. * GFF file invalid since IDs should be unique. The GFF loader should throw
  134. * and exception which this test checks for
  135. */
  136. public function testGFFImporterDuplicateIDsExceptionCheck() {
  137. $gff_file = ['file_local' => __DIR__ . '/../data/gff_duplicate_ids.gff'];
  138. $analysis = factory('chado.analysis')->create();
  139. $organism = factory('chado.organism')->create();
  140. $run_args = [
  141. 'analysis_id' => $analysis->analysis_id,
  142. 'organism_id' => $organism->organism_id,
  143. 'use_transaction' => 1,
  144. 'add_only' => 0,
  145. 'update' => 1,
  146. 'create_organism' => 0,
  147. 'create_target' => 0,
  148. // regexps for mRNA and protein.
  149. 're_mrna' => NULL,
  150. 're_protein' => NULL,
  151. // optional
  152. 'target_organism_id' => NULL,
  153. 'target_type' => NULL,
  154. 'start_line' => NULL,
  155. 'landmark_type' => NULL,
  156. 'alt_id_attr' => NULL,
  157. ];
  158. $hasException = false;
  159. try {
  160. $this->loadLandmarks($analysis, $organism);
  161. // This will produce an exception of duplicate feature ID
  162. $this->runGFFLoader($run_args, $gff_file);
  163. }
  164. catch(\Exception $ex) {
  165. $hasException = true;
  166. }
  167. // We expect an exception to happen so we are looking for a return of true
  168. $this->assertEquals($hasException, true);
  169. }
  170. /**
  171. * Run the GFF loader on gff_invalidstartend.gff for testing.
  172. *
  173. * This tests whether the GFF loader fixes start end values
  174. */
  175. public function testGFFImporterInvalidStartEnd() {
  176. $gff_file = ['file_local' => __DIR__ . '/../data/gff_invalidstartend.gff'];
  177. $analysis = factory('chado.analysis')->create();
  178. $organism = factory('chado.organism')->create();
  179. $run_args = [
  180. 'analysis_id' => $analysis->analysis_id,
  181. 'organism_id' => $organism->organism_id,
  182. 'use_transaction' => 1,
  183. 'add_only' => 0,
  184. 'update' => 1,
  185. 'create_organism' => 0,
  186. 'create_target' => 0,
  187. // regexps for mRNA and protein.
  188. 're_mrna' => NULL,
  189. 're_protein' => NULL,
  190. // optional
  191. 'target_organism_id' => NULL,
  192. 'target_type' => NULL,
  193. 'start_line' => NULL,
  194. 'landmark_type' => NULL,
  195. 'alt_id_attr' => NULL,
  196. ];
  197. $this->loadLandmarks($analysis, $organism);
  198. // This will produce an exception of duplicate feature ID
  199. $this->runGFFLoader($run_args, $gff_file);
  200. $results = db_select('chado.feature', 'f')
  201. ->fields('f', ['uniquename'])
  202. ->condition('f.uniquename', 'FRAEX38873_v2_000000010')
  203. ->execute()
  204. ->fetchAll();
  205. // We expect the feature to still be added to the database
  206. // since the GFF Loader caters for reversing backward numbers
  207. $this->assertEquals(count($results), 1);
  208. }
  209. /**
  210. * Run the GFF loader on gff_score.gff for testing.
  211. *
  212. * This tests whether the GFF loader interprets the score values
  213. */
  214. public function testGFFImporterScoreTest() {
  215. $gff_file = ['file_local' => __DIR__ . '/../data/gff_score.gff'];
  216. $analysis = factory('chado.analysis')->create();
  217. $organism = factory('chado.organism')->create();
  218. $run_args = [
  219. 'analysis_id' => $analysis->analysis_id,
  220. 'organism_id' => $organism->organism_id,
  221. 'use_transaction' => 1,
  222. 'add_only' => 0,
  223. 'update' => 1,
  224. 'create_organism' => 0,
  225. 'create_target' => 0,
  226. // regexps for mRNA and protein.
  227. 're_mrna' => NULL,
  228. 're_protein' => NULL,
  229. // optional
  230. 'target_organism_id' => NULL,
  231. 'target_type' => NULL,
  232. 'start_line' => NULL,
  233. 'landmark_type' => NULL,
  234. 'alt_id_attr' => NULL,
  235. ];
  236. $this->loadLandmarks($analysis, $organism);
  237. $this->runGFFLoader($run_args, $gff_file);
  238. // Test that integer values get placed in the db
  239. $results = db_query('SELECT * FROM chado.analysisfeature WHERE significance = 2 LIMIT 1', array(
  240. ));
  241. foreach ($results as $row){
  242. $this->assertEquals($row->significance,2);
  243. }
  244. // Test that decimal/float values get placed in the db
  245. $results = db_query('SELECT * FROM chado.analysisfeature WHERE significance = 2.5 LIMIT 1', array(
  246. ));
  247. foreach ($results as $row){
  248. $this->assertEquals($row->significance,2.5);
  249. }
  250. // Test that negative score values get placed in the db
  251. $results = db_query('SELECT * FROM chado.analysisfeature WHERE significance = -2.5 LIMIT 1', array(
  252. ));
  253. foreach ($results as $row){
  254. $this->assertEquals($row->significance,-2.5);
  255. }
  256. }
  257. /**
  258. * Run the GFF loader on gff_strand.gff for testing.
  259. *
  260. * This tests whether the GFF loader interprets the strand values
  261. */
  262. public function testGFFImporterStrandTest() {
  263. $gff_file = ['file_local' => __DIR__ . '/../data/gff_strand.gff'];
  264. $analysis = factory('chado.analysis')->create();
  265. $organism = factory('chado.organism')->create();
  266. $run_args = [
  267. 'analysis_id' => $analysis->analysis_id,
  268. 'organism_id' => $organism->organism_id,
  269. 'use_transaction' => 1,
  270. 'add_only' => 0,
  271. 'update' => 1,
  272. 'create_organism' => 0,
  273. 'create_target' => 0,
  274. // regexps for mRNA and protein.
  275. 're_mrna' => NULL,
  276. 're_protein' => NULL,
  277. // optional
  278. 'target_organism_id' => NULL,
  279. 'target_type' => NULL,
  280. 'start_line' => NULL,
  281. 'landmark_type' => NULL,
  282. 'alt_id_attr' => NULL,
  283. ];
  284. $this->loadLandmarks($analysis, $organism);
  285. $this->runGFFLoader($run_args, $gff_file);
  286. // Test that integer values for strand that get placed in the db
  287. // Strand data gets saved in chado.featureloc
  288. $results = db_query('SELECT * FROM chado.featureloc fl
  289. LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id)
  290. WHERE uniquename = :uniquename LIMIT 1',
  291. array(
  292. ':uniquename' => 'FRAEX38873_v2_000000010'
  293. )
  294. );
  295. foreach ($results as $row) {
  296. $this->assertEquals($row->strand, 1); // +
  297. }
  298. $results = db_query('SELECT * FROM chado.featureloc fl
  299. LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id)
  300. WHERE uniquename = :uniquename LIMIT 1',
  301. array(
  302. ':uniquename' => 'FRAEX38873_v2_000000010.1'
  303. )
  304. );
  305. foreach ($results as $row) {
  306. $this->assertEquals($row->strand,-1); // -
  307. }
  308. $results = db_query('SELECT * FROM chado.featureloc fl
  309. LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id)
  310. WHERE uniquename = :uniquename LIMIT 1',
  311. array(
  312. ':uniquename' => 'FRAEX38873_v2_000000010.2'
  313. )
  314. );
  315. foreach ($results as $row) {
  316. $this->assertEquals($row->strand, 0); // ?
  317. }
  318. $results = db_query('SELECT * FROM chado.featureloc fl
  319. LEFT JOIN chado.feature f ON (fl.feature_id = f.feature_id)
  320. WHERE uniquename = :uniquename LIMIT 1',
  321. array(
  322. ':uniquename' => 'FRAEX38873_v2_000000010.3'
  323. )
  324. );
  325. foreach ($results as $row) {
  326. $this->assertEquals($row->strand, 0); // .
  327. }
  328. // This GFF should create 5 featureloc records
  329. $results = db_query('SELECT COUNT(*) as c FROM chado.featureloc;');
  330. foreach ($results as $row) {
  331. $this->assertEquals($row->c, 5);
  332. }
  333. }
  334. /**
  335. * Run the GFF loader on small_gene.gff for testing.
  336. *
  337. * This gff has many attributes that we would like to test in the
  338. * testGFFImporterAttribute*() methods.
  339. */
  340. private function initGFFImporterAttributes() {
  341. $gff = ['file_local' => __DIR__ . '/../data/small_gene.gff'];
  342. $fasta = ['file_local' => __DIR__ . '/../data/short_scaffold.fasta'];
  343. $analysis = factory('chado.analysis')->create();
  344. $organism = factory('chado.organism')->create();
  345. $run_args = [
  346. 'analysis_id' => $analysis->analysis_id,
  347. 'organism_id' => $organism->organism_id,
  348. 'use_transaction' => 1,
  349. 'add_only' => 0,
  350. 'update' => 1,
  351. 'create_organism' => 0,
  352. 'create_target' => 0,
  353. ///regexps for mRNA and protein.
  354. 're_mrna' => NULL,
  355. 're_protein' => NULL,
  356. //optional
  357. 'target_organism_id' => $organism->organism_id,
  358. 'target_type' => NULL,
  359. 'start_line' => NULL,
  360. 'landmark_type' => NULL,
  361. 'alt_id_attr' => NULL,
  362. ];
  363. $this->loadLandmarks($analysis, $organism, $fasta);
  364. $this->runGFFLoader($run_args, $gff);
  365. $this->organism = $organism;
  366. $this->analysis = $analysis;
  367. $this->gene_cvt = chado_get_cvterm(array(
  368. 'name' => 'gene',
  369. 'cv_id' => array(
  370. 'name' => 'sequence',
  371. ),
  372. ))->cvterm_id;
  373. $this->mrna_cvt = chado_get_cvterm(array(
  374. 'name' => 'mRNA',
  375. 'cv_id' => array(
  376. 'name' => 'sequence',
  377. ),
  378. ))->cvterm_id;
  379. $this->supercontig_cvt = chado_get_cvterm(array(
  380. 'name' => 'supercontig',
  381. 'cv_id' => array(
  382. 'name' => 'sequence',
  383. ),
  384. ))->cvterm_id;
  385. $this->gene_1_uname = 'test_gene_001';
  386. $this->gene_2_uname = 'test_gene_002';
  387. $this->scaffold_1_uname = 'scaffold1';
  388. }
  389. /**
  390. * Ensures that the feature record is loaded correctly into chado.
  391. *
  392. * @group gff
  393. */
  394. public function testGFFImporterAttributeFeature() {
  395. $this->initGFFImporterAttributes();
  396. $organism = $this->organism;
  397. $query = db_select('chado.feature', 'f')
  398. ->fields('f')
  399. ->condition('uniquename', $this->gene_1_uname)
  400. ->condition('type_id', $this->gene_cvt)
  401. ->execute();
  402. $gene_1 = $query->fetchObject();
  403. $this->assertEquals('test_gene_001', $gene_1->uniquename);
  404. $this->assertEquals('test_gene_001', $gene_1->name);
  405. $this->assertEquals($organism->organism_id, $gene_1->organism_id);
  406. $this->assertEquals($this->gene_cvt, $gene_1->type_id);
  407. }
  408. /**
  409. * Ensures the feature alias is loaded correctly into chado.
  410. *
  411. * @group gff
  412. */
  413. public function testGFFImporterAttributeAlias() {
  414. $this->initGFFImporterAttributes();
  415. $alias = 'first_test_gene';
  416. $gene_1 = db_select('chado.feature', 'f')
  417. ->fields('f')
  418. ->condition('uniquename', $this->gene_1_uname)
  419. ->condition('type_id', $this->gene_cvt)
  420. ->execute()->fetchObject();
  421. $query = db_select('chado.feature_synonym', 'fs');
  422. $query->join('chado.synonym', 's', 's.synonym_id = fs.synonym_id');
  423. $query->fields('s');
  424. $query->condition('fs.feature_id', $gene_1->feature_id);
  425. $query = $query->execute();
  426. $result = $query->fetchObject();
  427. $this->assertEquals($alias, $result->name);
  428. }
  429. /**
  430. * Ensures that the dbxref records are loaded correctly into chado.
  431. *
  432. * @group gff
  433. */
  434. public function testGFFImporterAttributeDbxref() {
  435. $this->initGFFImporterAttributes();
  436. $test_db_name = 'TEST_DB';
  437. $dbx_accession = 'test_gene_dbx_001';
  438. $test_db = chado_get_db(array('name' => $test_db_name));
  439. $gff_db = chado_get_db(array('name' => 'GFF_source'));
  440. $gene_1 = db_select('chado.feature', 'f')
  441. ->fields('f')
  442. ->condition('uniquename', $this->gene_1_uname)
  443. ->condition('type_id', $this->gene_cvt)
  444. ->execute()->fetchObject();
  445. $dbx_query = db_select('chado.feature_dbxref', 'fdbx');
  446. $dbx_query->join('chado.dbxref', 'dbx', 'dbx.dbxref_id = fdbx.dbxref_id');
  447. $dbx_query->fields('dbx');
  448. $dbx_query->condition('fdbx.feature_id', $gene_1->feature_id);
  449. $gff_query = clone $dbx_query;
  450. $dbx_query->condition('dbx.db_id', $test_db->db_id);
  451. $dbx_query = $dbx_query->execute();
  452. $gff_query->condition('dbx.db_id', $gff_db->db_id);
  453. $gff_query = $gff_query->execute();
  454. $dbxref = $dbx_query->fetchObject();
  455. $gff_dbxref = $gff_query->fetchObject();
  456. $this->assertEquals($dbx_accession, $dbxref->accession);
  457. $this->assertEquals($this->gene_1_uname, $gff_dbxref->accession);
  458. }
  459. /**
  460. * Ensures ontology term records loaded correctly into chado.
  461. *
  462. * @group gff
  463. */
  464. public function testGFFImporterAttributeOntology() {
  465. $this->initGFFImporterAttributes();
  466. $ontology_db = 'SO';
  467. $ontology_accession = '0000704';
  468. $gene_1 = db_select('chado.feature', 'f')
  469. ->fields('f')
  470. ->condition('uniquename', $this->gene_1_uname)
  471. ->condition('type_id', $this->gene_cvt)
  472. ->execute()->fetchObject();
  473. $term = chado_get_cvterm(array(
  474. 'dbxref_id' => array(
  475. 'accession' => $ontology_accession,
  476. 'db_id' => array(
  477. 'name' => $ontology_db,
  478. ),
  479. ),
  480. ));
  481. $feature_cvt = db_select('chado.feature_cvterm', 'fcvt')
  482. ->fields('fcvt')
  483. ->condition('cvterm_id', $term->cvterm_id)
  484. ->condition('feature_id', $gene_1->feature_id)
  485. ->execute();
  486. $this->assertEquals(1, $feature_cvt->rowCount());
  487. }
  488. /**
  489. * Ensures feature parent record loaded correctly into chado.
  490. *
  491. * @group gff
  492. */
  493. public function testGFFImporterAttributeParent() {
  494. $this->initGFFImporterAttributes();
  495. $mrna_uname = 'test_mrna_001.1';
  496. $rel_cvt = chado_get_cvterm(array(
  497. 'name' => 'part_of',
  498. 'cv_id' => array(
  499. 'name' => 'sequence',
  500. ),
  501. ))->cvterm_id;
  502. $mrna = db_select('chado.feature', 'f')
  503. ->fields('f')
  504. ->condition('uniquename', $mrna_uname)
  505. ->condition('type_id', $this->mrna_cvt)
  506. ->execute()->fetchObject();
  507. $query = db_select('chado.feature_relationship', 'fr');
  508. $query->join('chado.feature', 'f', 'f.feature_id = fr.object_id');
  509. $query->fields('f');
  510. $query->condition('fr.subject_id', $mrna->feature_id);
  511. $query->condition('fr.type_id', $rel_cvt);
  512. $query = $query->execute();
  513. $parent = $query->fetchObject();
  514. $this->assertEquals('test_gene_001', $parent->uniquename);
  515. $this->assertEquals('test_gene_001', $parent->name);
  516. $this->assertEquals($this->gene_cvt, $parent->type_id);
  517. $this->assertEquals($this->organism->organism_id, $parent->organism_id);
  518. }
  519. /**
  520. * Ensure target record loaded correctly into chado.
  521. *
  522. * @group gff
  523. */
  524. public function testGFFImporterAttributeTarget() {
  525. $this->initGFFImporterAttributes();
  526. $target_feature = 'scaffold1';
  527. $start = 99;
  528. $end = 200;
  529. $target_type = 'supercontig';
  530. $target_cvt = chado_get_cvterm(array(
  531. 'name' => $target_type,
  532. 'cv_id' => array(
  533. 'name' => 'sequence',
  534. ),
  535. ))->cvterm_id;
  536. $source_feature = db_select('chado.feature', 'f')
  537. ->fields('f')
  538. ->condition('uniquename', $target_feature)
  539. ->condition('type_id', $target_cvt)
  540. ->execute()->fetchObject();
  541. $gene_1 = db_select('chado.feature', 'f')
  542. ->fields('f')
  543. ->condition('uniquename', $this->gene_1_uname)
  544. ->condition('type_id', $this->gene_cvt)
  545. ->execute()->fetchObject();
  546. $featureloc = db_select('chado.featureloc', 'fl')
  547. ->fields('fl')
  548. ->condition('fl.feature_id', $gene_1->feature_id)
  549. ->condition('fl.srcfeature_id', $source_feature->feature_id)
  550. ->execute()->fetchObject();
  551. $this->assertEquals($start, $featureloc->fmin);
  552. $this->assertEquals($end, $featureloc->fmax);
  553. }
  554. /**
  555. * Ensure properties loaded correctly into chado.
  556. *
  557. * @group gff
  558. */
  559. public function testGFFImporterAttributeProperty() {
  560. $this->initGFFImporterAttributes();
  561. $gap_1 = 'test_gap_1';
  562. $gap_2 = 'test_gap_2';
  563. $note_val = 'test_gene_001_note';
  564. $gene_1 = db_select('chado.feature', 'f')
  565. ->fields('f')
  566. ->condition('uniquename', $this->gene_1_uname)
  567. ->condition('type_id', $this->gene_cvt)
  568. ->execute()->fetchObject();
  569. $gap_cvt = chado_get_cvterm(array(
  570. 'name' => 'Gap',
  571. 'cv_id' => array(
  572. 'name' => 'feature_property',
  573. ),
  574. ))->cvterm_id;
  575. $note_cvt = chado_get_cvterm(array(
  576. 'name' => 'Note',
  577. 'cv_id' => array(
  578. 'name' => 'feature_property',
  579. ),
  580. ))->cvterm_id;
  581. // Assert gaps loaded correctly
  582. $gaps_query = db_select('chado.featureprop', 'fp')
  583. ->fields('fp')
  584. ->condition('feature_id', $gene_1->feature_id)
  585. ->condition('type_id', $gap_cvt)
  586. ->execute();
  587. while (($gap = $gaps_query->fetchObject())) {
  588. $gaps[$gap->value] = $gap;
  589. }
  590. $this->assertEquals($gap_1, $gaps[$gap_1]->value);
  591. $this->assertEquals(0, $gaps[$gap_1]->rank);
  592. // Assert note loaded correctly
  593. $note = db_select('chado.featureprop', 'fp')
  594. ->fields('fp')
  595. ->condition('feature_id', $gene_1->feature_id)
  596. ->condition('type_id', $note_cvt)
  597. ->execute()->fetchObject();
  598. $this->assertEquals($note_val, $note->value);
  599. $this->assertEquals(0, $note->rank);
  600. }
  601. /**
  602. * Ensure derives from information loaded correctly into chado.
  603. *
  604. * @group gff
  605. */
  606. public function testGFFImporterAttributeDerivesFrom() {
  607. $this->initGFFImporterAttributes();
  608. $gene_2 = db_select('chado.feature', 'f')
  609. ->fields('f')
  610. ->condition('uniquename', $this->gene_2_uname)
  611. ->condition('type_id', $this->gene_cvt)
  612. ->execute()->fetchObject();
  613. $derivesfrom_cvt = chado_get_cvterm(array(
  614. 'name' => 'derives_from',
  615. 'cv_id' => array(
  616. 'name' => 'sequence',
  617. ),
  618. ))->cvterm_id;
  619. $query = db_select('chado.feature', 'f');
  620. $query->join('chado.feature_relationship', 'fr', 'f.feature_id = fr.object_id');
  621. $query->fields('f');
  622. $query->condition('fr.subject_id', $gene_2->feature_id);
  623. $query->condition('fr.type_id', $derivesfrom_cvt);
  624. $query = $query->execute();
  625. $derivesfrom_feature = $query->fetchObject();
  626. $this->assertEquals($this->gene_1_uname, $derivesfrom_feature->uniquename);
  627. $this->assertEquals($this->gene_1_uname, $derivesfrom_feature->name);
  628. $this->assertEquals($this->gene_cvt, $derivesfrom_feature->type_id);
  629. }
  630. /**
  631. * Ensure FASTA information loaded correctly into chado.
  632. *
  633. * @group gff
  634. */
  635. public function testGFFImporterAttributeFastas() {
  636. $this->initGFFImporterAttributes();
  637. $scaffold = db_select('chado.feature', 'f')
  638. ->fields('f')
  639. ->condition('uniquename', $this->scaffold_1_uname)
  640. ->condition('type_id', $this->supercontig_cvt)
  641. ->execute()->fetchObject();
  642. $this->assertEquals(720, $scaffold->seqlen);
  643. $this->assertEquals(720, strlen($scaffold->residues));
  644. $this->assertEquals('83578d8afdaec399c682aa6c0ddd29c9', $scaffold->md5checksum);
  645. }
  646. /**
  647. * Test that when checked, explicit proteins are created when specified within
  648. * the GFF file. Explicit proteins will not respect the skip_protein argument
  649. * and will therefore be added to the database.
  650. *
  651. * @group gff
  652. * @ticket 77
  653. *
  654. */
  655. public function testGFFPolypeptide() {
  656. $gff_file = ['file_local' => __DIR__ . '/../data/simpleGFF.gff'];
  657. $analysis = factory('chado.analysis')->create();
  658. $organism = factory('chado.organism')->create();
  659. $run_args = [
  660. // The new argument
  661. 'skip_protein' => 1,
  662. 'analysis_id' => $analysis->analysis_id,
  663. 'organism_id' => $organism->organism_id,
  664. 'use_transaction' => 1,
  665. 'add_only' => 0,
  666. 'update' => 1,
  667. 'create_organism' => 0,
  668. 'create_target' => 0,
  669. // regexps for mRNA and protein.
  670. 're_mrna' => NULL,
  671. 're_protein' => NULL,
  672. // optional
  673. 'target_organism_id' => NULL,
  674. 'target_type' => NULL,
  675. 'start_line' => NULL,
  676. 'landmark_type' => NULL,
  677. 'alt_id_attr' => NULL,
  678. ];
  679. $this->loadLandmarks($analysis, $organism);
  680. $this->runGFFLoader($run_args, $gff_file);
  681. $identifier = [
  682. 'cv_id' => ['name' => 'sequence'],
  683. 'name' => 'polypeptide',
  684. ];
  685. $protein_type_id = tripal_get_cvterm($identifier);
  686. $name = 'FRAEX38873_v2_000000010.1.3_test_protein';
  687. $query = db_select('chado.feature', 'f')
  688. ->fields('f', ['uniquename'])
  689. ->condition('f.uniquename', $name)
  690. ->condition('f.type_id', $protein_type_id->cvterm_id)
  691. ->execute()
  692. ->fetchAll();
  693. $this->assertEquals(1, count($query));
  694. }
  695. /**
  696. * Add a skip protein option. Test that when checked, implicit proteins are
  697. * not created, but that they are created when unchecked.
  698. *
  699. * @group gff
  700. * @ticket 77
  701. *
  702. */
  703. public function testGFFNoProteinOption() {
  704. $gff_file = ['file_local' => __DIR__ . '/../data/gff_protein_generation.gff'];
  705. $analysis = factory('chado.analysis')->create();
  706. $organism = factory('chado.organism')->create();
  707. $run_args = [
  708. //Skip protein feature generation
  709. 'skip_protein' => 1,
  710. 'analysis_id' => $analysis->analysis_id,
  711. 'organism_id' => $organism->organism_id,
  712. 'use_transaction' => 1,
  713. 'add_only' => 0,
  714. 'update' => 1,
  715. 'create_organism' => 0,
  716. 'create_target' => 0,
  717. ///regexps for mRNA and protein.
  718. 're_mrna' => NULL,
  719. 're_protein' => NULL,
  720. //optional
  721. 'target_organism_id' => NULL,
  722. 'target_type' => NULL,
  723. 'start_line' => NULL,
  724. 'landmark_type' => NULL,
  725. 'alt_id_attr' => NULL,
  726. ];
  727. $this->loadLandmarks($analysis, $organism);
  728. $this->runGFFLoader($run_args, $gff_file);
  729. $identifier = [
  730. 'cv_id' => ['name' => 'sequence'],
  731. 'name' => 'polypeptide',
  732. ];
  733. $protein_type_id = tripal_get_cvterm($identifier);
  734. $name = "FRAEX38873_v2_000000190.1-protein";
  735. $results = db_select('chado.feature', 'f')
  736. ->fields('f', ['uniquename'])
  737. ->condition('f.uniquename', $name)
  738. ->condition('f.type_id', $protein_type_id->cvterm_id)
  739. ->execute()
  740. ->fetchAll();
  741. // There should be no proteins since we used the skip_proteins flag and no
  742. // explicit proteins were specified in the test file
  743. $this->assertEquals(0, count($results));
  744. // Now perform a unit test where we do not skip proteins generation
  745. $run_args['skip_protein'] = 0;
  746. $this->runGFFLoader($run_args, $gff_file);
  747. $query = db_select('chado.feature', 'f')
  748. ->fields('f', ['uniquename'])
  749. ->condition('f.uniquename', $name)
  750. ->condition('f.type_id', $protein_type_id->cvterm_id)
  751. ->execute()
  752. ->fetchObject();
  753. $this->assertEquals($name, $query->uniquename);
  754. }
  755. /**
  756. * The GFF importer should still create explicitly defined proteins if
  757. * skip_protein is true.
  758. *
  759. * @group gff
  760. * @ticket 77
  761. */
  762. public function testGFFImporterLoadsExplicitProteins() {
  763. $gff_file = ['file_local' => __DIR__ . '/../data/simpleGFF.gff'];
  764. $analysis = factory('chado.analysis')->create();
  765. $organism = factory('chado.organism')->create();
  766. $run_args = [
  767. //The new argument
  768. 'skip_protein' => 1,
  769. ///
  770. 'analysis_id' => $analysis->analysis_id,
  771. 'organism_id' => $organism->organism_id,
  772. 'use_transaction' => 1,
  773. 'add_only' => 0,
  774. 'update' => 1,
  775. 'create_organism' => 0,
  776. 'create_target' => 0,
  777. ///regexps for mRNA and protein.
  778. 're_mrna' => NULL,
  779. 're_protein' => NULL,
  780. //optional
  781. 'target_organism_id' => NULL,
  782. 'target_type' => NULL,
  783. 'start_line' => NULL,
  784. 'landmark_type' => NULL,
  785. 'alt_id_attr' => NULL,
  786. ];
  787. $this->loadLandmarks($analysis, $organism);
  788. $this->runGFFLoader($run_args, $gff_file);
  789. $name = 'FRAEX38873_v2_000000010.1.3_test_protein';
  790. $query = db_select('chado.feature', 'f')
  791. ->fields('f', ['uniquename'])
  792. ->condition('f.uniquename', $name)
  793. ->execute()
  794. ->fetchField();
  795. $this->assertEquals($name, $query);
  796. }
  797. private function runGFFLoader($run_args, $file) {
  798. // silent(function ($run_args, $file) {
  799. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/GFF3Importer');
  800. $importer = new \GFF3Importer();
  801. $importer->create($run_args, $file);
  802. $importer->prepareFiles();
  803. $importer->run();
  804. // });
  805. }
  806. private function loadLandmarks($analysis, $organism, $landmark_file = array()) {
  807. if (empty($landmark_file)) {
  808. $landmark_file = ['file_local' => __DIR__ . '/../data/empty_landmarks.fasta'];
  809. }
  810. $run_args = [
  811. 'organism_id' => $organism->organism_id,
  812. 'analysis_id' => $analysis->analysis_id,
  813. 'seqtype' => 'supercontig',
  814. 'method' => 2, //default insert and update
  815. 'match_type' => 1, //unique name default
  816. //optional
  817. 're_name' => NULL,
  818. 're_uname' => NULL,
  819. 're_accession' => NULL,
  820. 'db_id' => NULL,
  821. 'rel_type' => NULL,
  822. 're_subject' => NULL,
  823. 'parent_type' => NULL,
  824. ];
  825. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');
  826. //silent(function ($run_args, $landmark_file) {
  827. $importer = new \FASTAImporter();
  828. $importer->create($run_args, $landmark_file);
  829. $importer->prepareFiles();
  830. $importer->run();
  831. // });
  832. }
  833. }