DevSeedSeeder.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. <?php
  2. namespace Tests\DatabaseSeeders;
  3. use StatonLab\TripalTestSuite\Database\Seeder;
  4. use \Exception;
  5. class DevSeedSeeder extends Seeder
  6. {
  7. /**
  8. * Part one:
  9. * Chado records.
  10. * These are records used by many of the below importers.
  11. * ALL importers require an organism.
  12. * The expression data loader will associate the data with the
  13. * $expression_analysis record. All other importers associate data with the
  14. * $sequence_analysis. Uncomment the array to create that chado record.
  15. */
  16. protected $organism = [
  17. 'common_name' => 'F. excelsior miniature',
  18. 'genus' => 'Fraxinus',
  19. 'species' => 'excelsior',
  20. 'abbreviation' => 'F. excelsor',
  21. 'comment' => 'Loaded with TripalDev Seed.',
  22. ];
  23. protected $sequence_analysis = [
  24. 'name' => 'Fraxinus exclesior miniature dataset',
  25. 'description' => 'Tripal Dev Seed',
  26. ];
  27. protected $expression_analysis = [
  28. 'name' => 'Fraxinus exclesior miniature dataset Expression Analysis',
  29. 'description' => 'Tripal Dev Seed',
  30. ];
  31. protected $blastdb = [
  32. 'name' => 'DevSeed Database: TREMBL',
  33. 'description' => 'A dummy database created by DevSeed',
  34. ];
  35. /**
  36. * Part 2:
  37. * Files.
  38. * Each importer will take a file argument. This argument should be an array
  39. * with one of the following two keys: file_remote => url where the file is
  40. * located file_local => server path where the file is located.
  41. */
  42. protected $landmark_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/empty_landmarks.fasta'];
  43. protected $landmark_type = 'supercontig';
  44. protected $mRNA_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/mrna_mini.fasta'];
  45. protected $protein_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/polypeptide_mini.fasta'];
  46. protected $gff_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];
  47. protected $blast_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];
  48. protected $biomaterial_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/biomaterials/biomaterials.xml'];
  49. protected $expression_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/expression/expression.tsv'];
  50. protected $interpro_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/ips/polypeptide_mini.fasta.xml'];
  51. // Regular expression that will link the protein name to the mRNA parent feature name.
  52. // protected $prot_regexp = '/(FRA.*?)(?=:)/';
  53. protected $prot_regexp = null;
  54. public function __construct()
  55. {
  56. if ($this->organism) {
  57. try {
  58. $organism = $this->fetch_chado_record('chado.organism', [
  59. 'common_name',
  60. 'organism_id',
  61. ], $this->organism);
  62. } catch (\Exception $e) {
  63. echo $e->getMessage();
  64. exit;
  65. }
  66. $this->organism = $organism;
  67. if ($this->sequence_analysis) {
  68. try {
  69. $seq_analysis = $this->fetch_chado_record('chado.analysis', ['analysis_id'],
  70. $this->sequence_analysis);
  71. } catch (\Exception $e) {
  72. echo $e->getMessage();
  73. exit;
  74. }
  75. $this->sequence_analysis = $seq_analysis;
  76. }
  77. if ($this->expression_analysis) {
  78. try {
  79. $expression_analysis = $this->fetch_chado_record('chado.analysis', ['analysis_id'],
  80. $this->expression_analysis);
  81. } catch (\Exception $e) {
  82. echo $e->getMessage();
  83. exit;
  84. }
  85. $this->expression_analysis = $expression_analysis;
  86. }
  87. }
  88. if ($this->blastdb) {
  89. try {
  90. $blastdb = $this->fetch_chado_record('chado.db', ['db_id'], $this->blastdb);
  91. } catch (\Excetion $e) {
  92. echo $e->getMessage();
  93. }
  94. $this->blastdb = $blastdb;
  95. }
  96. }
  97. /**
  98. * Runs all loaders.
  99. * Will only run loaders where the files have been uncommented at the start
  100. * of the class.
  101. */
  102. public function up()
  103. {
  104. if ($this->landmark_file) {
  105. $run_args = [
  106. 'organism_id' => $this->organism->organism_id,
  107. 'analysis_id' => $this->sequence_analysis->analysis_id,
  108. 'seqtype' => $this->landmark_type,
  109. 'method' => 2, //default insert and update
  110. 'match_type' => 1, //unique name default
  111. //optional
  112. 're_name' => null,
  113. 're_uname' => null,
  114. 're_accession' => null,
  115. 'db_id' => null,
  116. 'rel_type' => null,
  117. 're_subject' => null,
  118. 'parent_type' => null,
  119. ];
  120. $this->load_landmarks($run_args, $this->landmark_file);
  121. }
  122. if ($this->gff_file) {
  123. $run_args = [
  124. 'analysis_id' => $this->sequence_analysis->analysis_id,
  125. 'organism_id' => $this->organism->organism_id,
  126. 'use_transaction' => 1,
  127. 'add_only' => 0,
  128. 'update' => 1,
  129. 'create_organism' => 0,
  130. 'create_target' => 0,
  131. ///regexps for mRNA and protein.
  132. 're_mrna' => null,
  133. 're_protein' => $this->prot_regexp,
  134. //optional
  135. 'target_organism_id' => null,
  136. 'target_type' => null,
  137. 'start_line' => null,
  138. 'landmark_type' => null,
  139. 'alt_id_attr' => null,
  140. ];
  141. $this->load_GFF($run_args, $this->gff_file);
  142. }
  143. if ($this->mRNA_file) {
  144. $run_args = [
  145. 'organism_id' => $this->organism->organism_id,
  146. 'analysis_id' => $this->sequence_analysis->analysis_id,
  147. 'seqtype' => 'mRNA',
  148. 'method' => 2, //default insert and update
  149. 'match_type' => 1, //unique name default
  150. //optional
  151. 're_name' => null,
  152. 're_uname' => null,
  153. 're_accession' => null,
  154. 'db_id' => null,
  155. 'rel_type' => null,
  156. 're_subject' => null,
  157. 'parent_type' => null,
  158. ];
  159. $this->load_mRNA_FASTA($run_args, $this->mRNA_file);
  160. }
  161. if ($this->protein_file) {
  162. $run_args = [
  163. 'organism_id' => $this->organism->organism_id,
  164. 'analysis_id' => $this->sequence_analysis->analysis_id,
  165. 'seqtype' => 'polypeptide',
  166. 'method' => 2,
  167. 'match_type' => 1,
  168. //optional
  169. 're_name' => null,
  170. 're_uname' => null,
  171. 're_accession' => null,
  172. 'db_id' => null,
  173. ];
  174. if ($this->prot_regexp) {
  175. //links polypeptide to mRNA
  176. $run_args['rel_type'] = 'derives_from';
  177. $run_args['re_subject'] = $this->prot_regexp;
  178. $run_args['parent_type'] = 'mRNA';
  179. }
  180. $this->load_polypeptide_FASTA($run_args, $this->protein_file);
  181. }
  182. if ($this->interpro_file) {
  183. $run_args = [
  184. 'analysis_id' => $this->sequence_analysis->analysis_id,
  185. //optional
  186. 'query_type' => 'mRNA',
  187. 'query_re' => $this->prot_regexp,
  188. 'query_uniquename' => null,
  189. 'parsego' => true,
  190. ];
  191. $this->load_interpro_annotations($run_args, $this->interpro_file);
  192. }
  193. if ($this->blast_file) {
  194. $run_args = [
  195. 'analysis_id' => $this->sequence_analysis->analysis_id,
  196. 'no_parsed' => 25,//number results to parse
  197. 'query_type' => 'mRNA',
  198. //optional
  199. 'blastdb' => $this->blastdb->db_id,
  200. 'blastfile_ext' => null,
  201. 'is_concat' => 0,
  202. 'query_re' => null,
  203. 'query_uniquename' => 0,
  204. ];
  205. $this->load_blast_annotations($run_args, $this->blast_file);
  206. }
  207. if ($this->biomaterial_file) {
  208. $run_args = [
  209. 'organism_id' => $this->organism->organism_id,
  210. 'analysis_id' => $this->sequence_analysis->analysis_id,
  211. ];
  212. //optional: specifies specific CVterms for properties/property values. Not used here.
  213. //'cvterm_configuration' => NULL,
  214. //'cvalue_configuration' => NULL];
  215. $this->load_biomaterials($run_args, $this->biomaterial_file);
  216. }
  217. if ($this->expression_file) {
  218. $run_args = [
  219. 'filetype' => 'mat', //matrix file type
  220. 'organism_id' => $this->organism->organism_id,
  221. 'analysis_id' => $this->sequence_analysis->analysis_id,
  222. //optional
  223. 'fileext' => null,
  224. 'feature_uniquenames' => 'uniq',
  225. 're_start' => null,
  226. 're_stop' => null,
  227. 'feature_uniquenames' => null,
  228. 'quantificationunits' => null,
  229. ];
  230. $this->load_expression($run_args, $this->expression_file);
  231. }
  232. }
  233. private function load_landmarks($run_args, $file)
  234. {
  235. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');
  236. $importer = new \FASTAImporter();
  237. $importer->create($run_args, $file);
  238. $importer->prepareFiles();
  239. $importer->run();
  240. }
  241. private function load_mRNA_FASTA($run_args, $file)
  242. {
  243. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');
  244. $importer = new \FASTAImporter();
  245. $importer->create($run_args, $file);
  246. $importer->prepareFiles();
  247. $importer->run();
  248. }
  249. private function load_polypeptide_FASTA($run_args, $file)
  250. {
  251. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');
  252. $importer = new \FASTAImporter();
  253. $importer->create($run_args, $file);
  254. $importer->prepareFiles();
  255. $importer->run();
  256. }
  257. private function load_interpro_annotations($run_args, $file)
  258. {
  259. module_load_include('inc', 'tripal_analysis_interpro', 'includes/TripalImporter/InterProImporter');
  260. $importer = new \InterProImporter();
  261. $importer->create($run_args, $file);
  262. $importer->prepareFiles();
  263. $importer->run();
  264. }
  265. private function load_GFF($run_args, $file)
  266. {
  267. module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/GFF3Importer');
  268. $importer = new \GFF3Importer();
  269. $importer->create($run_args, $file);
  270. $importer->prepareFiles();
  271. $importer->run();
  272. }
  273. private function load_blast_annotations($run_args, $file)
  274. {
  275. module_load_include('inc', 'tripal_analysis_blast', 'includes/TripalImporter/BlastImporter');
  276. $importer = new \BlastImporter();
  277. $importer->create($run_args, $file);
  278. $importer->prepareFiles();
  279. $importer->run();
  280. }
  281. private function load_biomaterials($run_args, $file)
  282. {
  283. module_load_include('inc', 'tripal_biomaterial', 'includes/TripalImporter/tripal_biomaterial_loader_v3');
  284. $importer = new \tripal_biomaterial_loader_v3();
  285. $importer->create($run_args, $file);
  286. $importer->prepareFiles();
  287. $importer->run();
  288. }
  289. private function load_expression($run_args, $file)
  290. {
  291. module_load_include('inc', 'tripal_analysis_expression',
  292. 'includes/TripalImporter/tripal_expression_data_loader');
  293. $importer = new \tripal_expression_data_loader();
  294. $importer->create($run_args, $file);
  295. $importer->prepareFiles();
  296. $importer->run();
  297. }
  298. private function fetch_chado_record($table, $fields, $factory_array)
  299. {
  300. $query = db_select($table, 't')->fields('t', $fields);
  301. foreach ($factory_array as $key => $value) {
  302. $query->condition($key, $value);
  303. }
  304. $count_query = $query;
  305. $count = (int) $count_query->countQuery()->execute()->fetchField();
  306. if ($count === 0) {
  307. return factory($table)->create($factory_array);
  308. }
  309. if ($count === 1) {
  310. return $query->execute()->fetchObject();
  311. }
  312. throw new Exception("Error creating object for: ".$table.".\n Array supplied matches ".$count_query." results, must match 1.");
  313. }
  314. }