| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 | <?phpnamespace Tests\DatabaseSeeders;use StatonLab\TripalTestSuite\Database\Seeder;use \Exception;class DevSeedSeeder extends Seeder{    /**     * Part one:     * Chado records.     * These are records used by many of the below importers.     * ALL importers require an organism.     * The expression data loader will associate the data with the     * $expression_analysis record. All other importers associate data with the     * $sequence_analysis. Uncomment the array to create that chado record.     */protected $organism = [        'common_name' => 'F. excelsior miniature',        'genus' => 'Fraxinus',        'species' => 'excelsior',        'abbreviation' => 'F. excelsor',        'comment' => 'Loaded with TripalDev Seed.',      ];protected $sequence_analysis = [      'name' => 'Fraxinus exclesior miniature dataset',      'description' => 'Tripal Dev Seed',    ];    protected $expression_analysis = [      'name' => 'Fraxinus exclesior miniature dataset Expression Analysis',      'description' => 'Tripal Dev Seed',    ];  protected $blastdb = [    'name' => 'DevSeed Database: TREMBL',    'description' => 'A dummy database created by DevSeed',  ];    /**     * Part 2:     * Files.     * Each importer will take a file argument.  This argument should be an array     * with one of the following two keys: file_remote => url where the file is     * located file_local => server path where the file is located.     */    protected $landmark_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/empty_landmarks.fasta'];    protected $landmark_type = 'supercontig';    protected $mRNA_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/mrna_mini.fasta'];    protected $protein_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/sequences/polypeptide_mini.fasta'];    protected $gff_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];    protected $blast_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/gff/filtered.gff'];    protected $biomaterial_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/biomaterials/biomaterials.xml'];    protected $expression_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/expression/expression.tsv'];    protected $interpro_file = ['file_remote' => 'https://raw.githubusercontent.com/statonlab/tripal_dev_seed/master/Fexcel_mini/ips/polypeptide_mini.fasta.xml'];    // Regular expression that will link the protein name to the mRNA parent feature name.   // protected $prot_regexp = '/(FRA.*?)(?=:)/';    protected $prot_regexp = null;    public function __construct()    {        if ($this->organism) {            try {                $organism = $this->fetch_chado_record('chado.organism', [                    'common_name',                    'organism_id',                ], $this->organism);            } catch (\Exception $e) {                echo $e->getMessage();                exit;            }            $this->organism = $organism;            if ($this->sequence_analysis) {                try {                    $seq_analysis = $this->fetch_chado_record('chado.analysis', ['analysis_id'],                        $this->sequence_analysis);                } catch (\Exception $e) {                    echo $e->getMessage();                    exit;                }                $this->sequence_analysis = $seq_analysis;            }            if ($this->expression_analysis) {                try {                    $expression_analysis = $this->fetch_chado_record('chado.analysis', ['analysis_id'],                        $this->expression_analysis);                } catch (\Exception $e) {                    echo $e->getMessage();                    exit;                }                $this->expression_analysis = $expression_analysis;            }        }        if ($this->blastdb) {            try {                $blastdb = $this->fetch_chado_record('chado.db', ['db_id'], $this->blastdb);            } catch (\Excetion $e) {                echo $e->getMessage();            }            $this->blastdb = $blastdb;        }    }    /**     * Runs all loaders.     * Will only run loaders where the files have been uncommented at the start     * of the class.     */    public function up()    {        if ($this->landmark_file) {            $run_args = [                'organism_id' => $this->organism->organism_id,                'analysis_id' => $this->sequence_analysis->analysis_id,                'seqtype' => $this->landmark_type,                'method' => 2, //default insert and update                'match_type' => 1, //unique name default                //optional                're_name' => null,                're_uname' => null,                're_accession' => null,                'db_id' => null,                'rel_type' => null,                're_subject' => null,                'parent_type' => null,            ];            $this->load_landmarks($run_args, $this->landmark_file);        }        if ($this->gff_file) {            $run_args = [                'analysis_id' => $this->sequence_analysis->analysis_id,                'organism_id' => $this->organism->organism_id,                'use_transaction' => 1,                'add_only' => 0,                'update' => 1,                'create_organism' => 0,                'create_target' => 0,                ///regexps for mRNA and protein.                're_mrna' => null,                're_protein' => $this->prot_regexp,                //optional                'target_organism_id' => null,                'target_type' => null,                'start_line' => null,                'landmark_type' => null,                'alt_id_attr' => null,            ];            $this->load_GFF($run_args, $this->gff_file);        }        if ($this->mRNA_file) {            $run_args = [                'organism_id' => $this->organism->organism_id,                'analysis_id' => $this->sequence_analysis->analysis_id,                'seqtype' => 'mRNA',                'method' => 2, //default insert and update                'match_type' => 1, //unique name default                //optional                're_name' => null,                're_uname' => null,                're_accession' => null,                'db_id' => null,                'rel_type' => null,                're_subject' => null,                'parent_type' => null,            ];            $this->load_mRNA_FASTA($run_args, $this->mRNA_file);        }        if ($this->protein_file) {            $run_args = [                'organism_id' => $this->organism->organism_id,                'analysis_id' => $this->sequence_analysis->analysis_id,                'seqtype' => 'polypeptide',                'method' => 2,                'match_type' => 1,                //optional                're_name' => null,                're_uname' => null,                're_accession' => null,                'db_id' => null,            ];            if ($this->prot_regexp) {                //links polypeptide to mRNA                $run_args['rel_type'] = 'derives_from';                $run_args['re_subject'] = $this->prot_regexp;                $run_args['parent_type'] = 'mRNA';            }            $this->load_polypeptide_FASTA($run_args, $this->protein_file);        }        if ($this->interpro_file) {            $run_args = [                'analysis_id' => $this->sequence_analysis->analysis_id,                //optional                'query_type' => 'mRNA',                'query_re' => $this->prot_regexp,                'query_uniquename' => null,                'parsego' => true,            ];            $this->load_interpro_annotations($run_args, $this->interpro_file);        }        if ($this->blast_file) {            $run_args = [                'analysis_id' => $this->sequence_analysis->analysis_id,                'no_parsed' => 25,//number results to parse                'query_type' => 'mRNA',                //optional                'blastdb' => $this->blastdb->db_id,                'blastfile_ext' => null,                'is_concat' => 0,                'query_re' => null,                'query_uniquename' => 0,            ];            $this->load_blast_annotations($run_args, $this->blast_file);        }        if ($this->biomaterial_file) {            $run_args = [                'organism_id' => $this->organism->organism_id,                'analysis_id' => $this->sequence_analysis->analysis_id,            ];            //optional: specifies specific CVterms for properties/property values.  Not used here.            //'cvterm_configuration' => NULL,            //'cvalue_configuration' => NULL];            $this->load_biomaterials($run_args, $this->biomaterial_file);        }        if ($this->expression_file) {            $run_args = [                'filetype' => 'mat', //matrix file type                'organism_id' => $this->organism->organism_id,                'analysis_id' => $this->sequence_analysis->analysis_id,                //optional                'fileext' => null,                'feature_uniquenames' => 'uniq',                're_start' => null,                're_stop' => null,                'feature_uniquenames' => null,                'quantificationunits' => null,            ];            $this->load_expression($run_args, $this->expression_file);        }    }    private function load_landmarks($run_args, $file)    {        module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');        $importer = new \FASTAImporter();        $importer->create($run_args, $file);        $importer->prepareFiles();        $importer->run();    }    private function load_mRNA_FASTA($run_args, $file)    {        module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');        $importer = new \FASTAImporter();        $importer->create($run_args, $file);        $importer->prepareFiles();        $importer->run();    }    private function load_polypeptide_FASTA($run_args, $file)    {        module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/FASTAImporter');        $importer = new \FASTAImporter();        $importer->create($run_args, $file);        $importer->prepareFiles();        $importer->run();    }    private function load_interpro_annotations($run_args, $file)    {        module_load_include('inc', 'tripal_analysis_interpro', 'includes/TripalImporter/InterProImporter');        $importer = new \InterProImporter();        $importer->create($run_args, $file);        $importer->prepareFiles();        $importer->run();    }    private function load_GFF($run_args, $file)    {        module_load_include('inc', 'tripal_chado', 'includes/TripalImporter/GFF3Importer');        $importer = new \GFF3Importer();        $importer->create($run_args, $file);        $importer->prepareFiles();        $importer->run();    }    private function load_blast_annotations($run_args, $file)    {        module_load_include('inc', 'tripal_analysis_blast', 'includes/TripalImporter/BlastImporter');        $importer = new \BlastImporter();        $importer->create($run_args, $file);        $importer->prepareFiles();        $importer->run();    }    private function load_biomaterials($run_args, $file)    {        module_load_include('inc', 'tripal_biomaterial', 'includes/TripalImporter/tripal_biomaterial_loader_v3');        $importer = new \tripal_biomaterial_loader_v3();        $importer->create($run_args, $file);        $importer->prepareFiles();        $importer->run();    }    private function load_expression($run_args, $file)    {        module_load_include('inc', 'tripal_analysis_expression',            'includes/TripalImporter/tripal_expression_data_loader');        $importer = new \tripal_expression_data_loader();        $importer->create($run_args, $file);        $importer->prepareFiles();        $importer->run();    }    private function fetch_chado_record($table, $fields, $factory_array)    {        $query = db_select($table, 't')->fields('t', $fields);        foreach ($factory_array as $key => $value) {            $query->condition($key, $value);        }        $count_query = $query;        $count = (int) $count_query->countQuery()->execute()->fetchField();        if ($count === 0) {            return factory($table)->create($factory_array);        }        if ($count === 1) {            return $query->execute()->fetchObject();        }        throw new Exception("Error creating object for: ".$table.".\n Array supplied matches ".$count_query." results, must match 1.");    }}
 |