123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929 |
- <?php
- /**
- * @file
- * @todo Add file header description
- */
- /**
- * @defgroup fasta_loader FASTA Feature Loader
- * @ingroup tripal_feature
- * @{
- * Provides fasta loading functionality. Creates features based on their specification in a fasta file.
- * @}
- *
- */
- /**
- *
- *
- * @ingroup fasta_loader
- */
- function tripal_feature_fasta_load_form( ) {
- $form['fasta_file']= array(
- '#type' => 'textfield',
- '#title' => t('FASTA File'),
- '#description' => t('Please enter the full system path for the FASTA file, or a path within the Drupal
- installation (e.g. /sites/default/files/xyz.obo). The path must be accessible to the
- server on which this Drupal instance is running.'),
- '#required' => TRUE,
- );
- // get the list of organisms
- $sql = "SELECT * FROM {organism} ORDER BY genus, species";
- $org_rset = chado_query($sql);
- $organisms = array();
- $organisms[''] = '';
- while ($organism = db_fetch_object($org_rset)) {
- $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
- }
- $form['organism_id'] = array(
- '#title' => t('Organism'),
- '#type' => t('select'),
- '#description' => t("Choose the organism to which these sequences are associated"),
- '#required' => TRUE,
- '#options' => $organisms,
- );
- $form['seqtype']= array(
- '#type' => 'textfield',
- '#title' => t('Sequence Type'),
- '#required' => TRUE,
- '#description' => t('Please enter the Sequence Ontology (SO) term name that describes the sequences in the FASTA file (e.g. gene, mRNA, protein, etc...)'),
- );
- // get the list of organisms
- $sql = "SELECT L.library_id, L.name, CVT.name as type
- FROM {library} L
- INNER JOIN {cvterm} CVT ON L.type_id = CVT.cvterm_id
- ORDER BY name";
- $lib_rset = chado_query($sql);
- $libraries = array();
- $libraries[''] = '';
- while ($library = db_fetch_object($lib_rset)) {
- $libraries[$library->library_id] = "$library->name ($library->type)";
- }
- // $form['library_id'] = array (
- // '#title' => t('Library'),
- // '#type' => t('select'),
- // '#description' => t("Choose the library to which these sequences are associated "),
- // '#required' => FALSE,
- // '#options' => $libraries,
- // '#weight' => 5,
- // );
- $form['method']= array(
- '#type' => 'radios',
- '#title' => 'Method',
- '#required' => TRUE,
- '#options' => array(
- t('Insert only'),
- t('Update only'),
- t('Insert and update'),
- ),
- '#description' => t('Select how features in the FASTA file are handled.
- Select "Insert only" to insert the new features. If a feature already
- exists with the same name or unique name and type then it is skipped.
- Select "Update only" to only update featues that already exist in the
- database. Select "Insert and Update" to insert features that do
- not exist and upate those that do.'),
- '#default_value' => 2,
- );
- $form['match_type']= array(
- '#type' => 'radios',
- '#title' => 'Name Match Type',
- '#required' => TRUE,
- '#options' => array(
- t('Name'),
- t('Unique name'),
- ),
- '#description' => t('Used for "updates only" or "insert and update" methods. Not required if method type is "insert".
- Feature data is stored in Chado with both a human-readable
- name and a unique name. If the features in your FASTA file are uniquely identified using
- a human-readable name then select the "Name" button. If your features are
- uniquely identified using the unique name then select the "Unique name" button. If you
- loaded your features first using the GFF loader then the unique name of each
- features were indicated by the "ID=" attribute and the name by the "Name=" attribute.
- By default, the FASTA loader will use the first word (character string
- before the first space) as the name for your feature. If
- this does not uniquely identify your feature consider specifying a regular expression in the advanced section below.
- Additionally, you may import both a name and a unique name for each sequence using the advanced options.'),
- '#default_value' => 1,
- );
- $form['analysis'] = array(
- '#type' => 'fieldset',
- '#title' => t('Analysis Used to Derive Features'),
- '#collapsed' => TRUE
- );
- $form['analysis']['desc'] = array(
- '#type' => 'markup',
- '#value' => t("Why specify an analysis for a data load? All data comes
- from some place, even if downloaded from Genbank. By specifying
- analysis details for all data uploads, it allows an end user to reproduce the
- data set, but at least indicates the source of the data."),
- );
- // get the list of organisms
- $sql = "SELECT * FROM {analysis} ORDER BY name";
- $org_rset = chado_query($sql);
- $analyses = array();
- $analyses[''] = '';
- while ($analysis = db_fetch_object($org_rset)) {
- $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
- }
- $form['analysis']['analysis_id'] = array(
- '#title' => t('Analysis'),
- '#type' => t('select'),
- '#description' => t("Choose the analysis to which these features are associated"),
- '#required' => TRUE,
- '#options' => $analyses,
- );
- // Advanced Options
- $form['advanced'] = array(
- '#type' => 'fieldset',
- '#title' => t('Advanced Options'),
- '#collapsible' => TRUE,
- '#collapsed' => TRUE
- );
- $form['advanced']['re_help']= array(
- '#type' => 'item',
- '#value' => t('A regular expression is an advanced method for extracting information from a string of text.
- Your FASTA file may contain both a human-readable name and a unique name for each sequence.
- If you want to import
- both the name and unique name for all sequences, then you must provide regular expressions
- so that the loader knows how to separate them.
- Otherwise the name and uniquename will be the same.
- By default, this loader will use the first word in the definition
- lines of the FASTA file
- as the name or unique name of the feature.'),
- );
- $form['advanced']['re_name']= array(
- '#type' => 'textfield',
- '#title' => t('Regular expression for the name'),
- '#required' => FALSE,
- '#description' => t('Enter the regular expression that will extract the
- feature name from the FASTA definition line. For example, for a
- defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
- the regular expression for the name would be, "^(.*?)\|.*$".'),
- );
- $form['advanced']['re_uname']= array(
- '#type' => 'textfield',
- '#title' => t('Regular expression for the unique name'),
- '#required' => FALSE,
- '#description' => t('Enter the regular expression that will extract the
- feature name from the FASTA definition line. For example, for a
- defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
- the regular expression for the unique name would be "^.*?\|(.*)$").'),
- );
- // Advanced database cross-reference optoins
- $form['advanced']['db'] = array(
- '#type' => 'fieldset',
- '#title' => t('External Database Reference'),
- '#weight' => 6,
- '#collapsed' => TRUE
- );
- $form['advanced']['db']['re_accession']= array(
- '#type' => 'textfield',
- '#title' => t('Regular expression for the accession'),
- '#required' => FALSE,
- '#description' => t('Enter the regular expression that will extract the accession for the external database for each feature from the FASTA definition line.'),
- '#weight' => 2
- );
- // get the list of databases
- $sql = "SELECT * FROM {db} ORDER BY name";
- $db_rset = chado_query($sql);
- $dbs = array();
- $dbs[''] = '';
- while ($db = db_fetch_object($db_rset)) {
- $dbs[$db->db_id] = "$db->name";
- }
- $form['advanced']['db']['db_id'] = array(
- '#title' => t('External Database'),
- '#type' => t('select'),
- '#description' => t("Plese choose an external database for which these sequences have a cross reference."),
- '#required' => FALSE,
- '#options' => $dbs,
- '#weight' => 1,
- );
- $form['advanced']['relationship'] = array(
- '#type' => 'fieldset',
- '#title' => t('Relationships'),
- '#weight' => 6,
- '#collapsed' => TRUE
- );
- $rels = array();
- $rels[''] = '';
- $rels['part_of'] = 'part of';
- $rels['derives_from'] = 'produced by';
- // Advanced references options
- $form['advanced']['relationship']['rel_type']= array(
- '#title' => t('Relationship Type'),
- '#type' => t('select'),
- '#description' => t("Use this option to create associations, or relationships between the
- features of this FASTA file and existing features in the database. For
- example, to associate a FASTA file of peptides to existing genes or transcript sequence,
- select the type 'produced by'. For a CDS sequences select the type 'part of'"),
- '#required' => FALSE,
- '#options' => $rels,
- '#weight' => 5,
- );
- $form['advanced']['relationship']['re_subject']= array(
- '#type' => 'textfield',
- '#title' => t('Regular expression for the parent'),
- '#required' => FALSE,
- '#description' => t('Enter the regular expression that will extract the unique
- name needed to identify the existing sequence for which the
- relationship type selected above will apply.'),
- '#weight' => 6
- );
- $form['advanced']['relationship']['parent_type']= array(
- '#type' => 'textfield',
- '#title' => t('Parent Type'),
- '#required' => FALSE,
- '#description' => t('Please enter the Sequence Ontology term for the parent. For example
- if the FASTA file being loaded is a set of proteins that are
- products of genes, then use the SO term \'gene\' or \'transcript\' or equivalent. However,
- this type must match the type for already loaded features.'),
- '#weight' => 7
- );
- $form['button'] = array(
- '#type' => 'submit',
- '#value' => t('Import FASTA file'),
- '#weight' => 10,
- );
- return $form;
- }
- /**
- *
- *
- * @ingroup fasta_loader
- */
- function tripal_feature_fasta_load_form_validate($form, &$form_state) {
- $fasta_file = trim($form_state['values']['fasta_file']);
- $organism_id = $form_state['values']['organism_id'];
- $type = trim($form_state['values']['seqtype']);
- $method = trim($form_state['values']['method']);
- $match_type = trim($form_state['values']['match_type']);
- $library_id = $form_state['values']['library_id'];
- $re_name = trim($form_state['values']['re_name']);
- $re_uname = trim($form_state['values']['re_uname']);
- $re_accession = trim($form_state['values']['re_accession']);
- $db_id = $form_state['values']['db_id'];
- $rel_type = $form_state['values']['rel_type'];
- $re_subject = trim($form_state['values']['re_subject']);
- $parent_type = trim($form_state['values']['parent_type']);
- if ($method == 0) {
- $method = 'Insert only';
- }
- if ($method == 1) {
- $method = 'Update only';
- }
- if ($method == 2) {
- $method = 'Insert and update';
- }
- if ($match_type == 0) {
- $match_type = 'Name';
- }
- if ($match_type == 1) {
- $match_type = 'Unique name';
- }
- if ($re_name and !$re_uname and strcmp($match_type, 'Unique name')==0) {
- form_set_error('re_uname', t("You must provide a regular expression to identify the sequence unique name"));
- }
- if (!$re_name and $re_uname and strcmp($match_type, 'Name')==0) {
- form_set_error('re_name', t("You must provide a regular expression to identify the sequence name"));
- }
-
- // check to see if the file is located local to Drupal
- $fasta_file = trim($fasta_file);
- $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $fasta_file;
- if (!file_exists($dfile)) {
- // if not local to Drupal, the file must be someplace else, just use
- // the full path provided
- $dfile = $fasta_file;
- }
- if (!file_exists($dfile)) {
- form_set_error('fasta_file', t("Cannot find the file on the system. Check that the file exists or that the web server has permissions to read the file."));
- }
- // make sure if a relationship is specified that all fields are provided.
- if (($rel_type or $parent_type) and !$re_subject) {
- form_set_error('re_subject', t("Please provide a regular expression for the parent"));
- }
- if (($rel_type or $re_subject) and !$parent_type) {
- form_set_error('parent_type', t("Please provide a SO term for the parent"));
- }
- if (($parent_type or $re_subject) and !$rel_type) {
- form_set_error('rel_type', t("Please select a relationship type"));
- }
- // make sure if a database is specified that all fields are provided
- if ($db_id and !$re_accession) {
- form_set_error('re_accession', t("Please provide a regular expression for the accession"));
- }
- if ($re_accession and !$db_id) {
- form_set_error('db_id', t("Please select a database"));
- }
- // check to make sure the types exists
- $cvtermsql = "SELECT CVT.cvterm_id
- FROM {cvterm} CVT
- INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
- LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
- WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')";
- $cvterm = db_fetch_object(chado_query($cvtermsql, 'sequence', $type, $type));
- if (!$cvterm) {
- form_set_error('type', t("The Sequence Ontology (SO) term selected for the sequence type is not available in the database. Please check spelling or select another."));
- }
- if ($rel_type) {
- $cvterm = db_fetch_object(chado_query($cvtermsql, 'sequence', $parent_type, $parent_type));
- if (!$cvterm) {
- form_set_error('parent_type', t("The Sequence Ontology (SO) term selected for the parent relationship is not available in the database. Please check spelling or select another."));
- }
- }
- // check to make sure the 'relationship' and 'sequence' ontologies are loaded
- $form_state['storage']['dfile'] = $dfile;
- }
- /**
- *
- *
- * @ingroup fasta_loader
- */
- function tripal_feature_fasta_load_form_submit($form, &$form_state) {
- global $user;
- $dfile = $form_state['storage']['dfile'];
- $organism_id = $form_state['values']['organism_id'];
- $type = trim($form_state['values']['seqtype']);
- $method = trim($form_state['values']['method']);
- $match_type = trim($form_state['values']['match_type']);
- $library_id = $form_state['values']['library_id'];
- $re_name = trim($form_state['values']['re_name']);
- $re_uname = trim($form_state['values']['re_uname']);
- $re_accession = trim($form_state['values']['re_accession']);
- $db_id = $form_state['values']['db_id'];
- $rel_type = $form_state['values']['rel_type'];
- $re_subject = trim($form_state['values']['re_subject']);
- $parent_type = trim($form_state['values']['parent_type']);
- $analysis_id = $form_state['values']['analysis_id'];
- if ($method == 0) {
- $method = 'Insert only';
- }
- if ($method == 1) {
- $method = 'Update only';
- }
- if ($method == 2) {
- $method = 'Insert and update';
- }
- if ($match_type == 0) {
- $match_type = 'Name';
- }
- if ($match_type == 1) {
- $match_type = 'Unique name';
- }
- $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname,
- $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method,
- $user->uid, $analysis_id, $match_type);
-
- $fname = preg_replace("/.*\/(.*)/", "$1", $dfile);
- tripal_add_job("Import FASTA file: $fname", 'tripal_feature',
- 'tripal_feature_load_fasta', $args, $user->uid);
- }
- /**
- *
- *
- * @ingroup fasta_loader
- */
- function tripal_feature_load_fasta($dfile, $organism_id, $type,
- $library_id, $re_name, $re_uname, $re_accession, $db_id, $rel_type,
- $re_subject, $parent_type, $method, $uid, $analysis_id,
- $match_type, $job = NULL) {
- // begin the transaction
- $connection = tripal_db_start_transaction();
-
- // if we cannot get a connection then let the user know the loading will be slow
- if (!$connection) {
- print "A persistant connection was not obtained. Loading will be slow\n";
- }
- else {
- print "\nNOTE: Loading of this FASTA file is performed using a database transaction. \n" .
- "If the load fails or is terminated prematurely then the entire set of \n" .
- "insertions/updates is rolled back and will not be found in the database\n\n";
- }
- // first get the type for this sequence
- $cvtermsql = "SELECT CVT.cvterm_id
- FROM {cvterm} CVT
- INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
- LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
- WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')";
- $cvterm = db_fetch_object(chado_query($cvtermsql, 'sequence', $type, $type));
- if (!$cvterm) {
- watchdog("T_fasta_loader", "Cannot find the term type: '%type'", array('%type' => $type), WATCHDOG_ERROR);
- return 0;
- }
- if ($parent_type) {
- $parentcvterm = db_fetch_object(chado_query($cvtermsql, 'sequence', $parent_type, $parent_type));
- if (!$parentcvterm) {
- watchdog("T_fasta_loader", "Cannot find the paretne term type: '%type'", array('%type' => $parentcvterm), WATCHDOG_ERROR);
- return 0;
- }
- }
- if ($rel_type) {
- $relcvterm = db_fetch_object(chado_query($cvtermsql, 'relationship', $rel_type, $rel_type));
- if (!$relcvterm) {
- watchdog("T_fasta_loader", "Cannot find the relationship term type: '%type'", array('%type' => $relcvterm), WATCHDOG_ERROR);
- return 0;
- }
- }
-
- print "Opening FASTA file $dfile\n";
- //$lines = file($dfile, FILE_SKIP_EMPTY_LINES);
- $fh = fopen($dfile, 'r');
- if (!$fh) {
- watchdog('T_fasta_loader', "cannot open file: %dfile", array('%dfile' => $dfile), WATCHDOG_ERROR);
- return 0;
- }
- $filesize = filesize($dfile);
- $i = 0;
- $name = '';
- $uname = '';
- $residues = '';
- $interval = intval($filesize * 0.01);
- if ($interval < 1) {
- $interval = 1;
- }
- $inv_read = 0;
-
- // we need to get the table schema to make sure we don't overrun the
- // size of fields with what our regular expressions retrieve
- $feature_tbl = tripal_core_get_chado_table_schema('feature');
- $dbxref_tbl = tripal_core_get_chado_table_schema('dbxref');
- //foreach ($lines as $line_num => $line) {
- while ($line = fgets($fh)) {
- $i++; // update the line count
- $num_read += drupal_strlen($line);
- $intv_read += drupal_strlen($line);
- // if we encounter a definition line then get the name, uniquename,
- // accession and relationship subject from the definition line
- if (preg_match('/^>/', $line)) {
- // if we have a feature name then we are starting a new sequence
- // so lets handle the previous one before moving on
- if ($name or $uname) {
- tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
- $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
- $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm);
- $residues = '';
- $name = '';
- $uname = '';
- }
- $line = preg_replace("/^>/", '', $line); // remove the > symbol from the defline
-
- // get the feature name
- if ($re_name) {
- if (!preg_match("/$re_name/", $line, $matches)) {
- watchdog('trp-fasta', "ERROR: Regular expression for the feature name finds nothing. Line %line.", array('%line' => $i), 'error');
- }
- elseif (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
- watchdog('trp-fasta', "WARNING: Regular expression retrieves a value too long for the feature name. Line %line.", array('%line' => $i), 'error');
- }
- else {
- $name = trim($matches[1]);
- }
- }
- else {
- // if the match_type is name and no regular expression was provided
- // then use the first word as the name, otherwise we don't set the name
- if (strcmp($match_type, 'Name')==0) {
- if(preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)){
- if (strlen($matches[1]) > $feature_tbl['fields']['name']['length']) {
- watchdog('trp-fasta', "WARNING: Regular expression retrieves a feature name too long for the feature name. Line %line.", array('%line' => $i), 'error');
- }
- else {
- $name = trim($matches[1]);
- }
- }
- else {
- watchdog('trp-fasta', "ERROR: Cannot find a feature name. Line %line.", array('%line' => $i), 'error');
- }
- }
- }
-
- // get the feature unique name
- if ($re_uname) {
- if (!preg_match("/$re_uname/", $line, $matches)) {
- watchdog('trp-fasta', "ERROR: Regular expression for the feature unique name finds nothing. Line %line.", array('%line' => $i), 'error');
- }
- $uname = trim($matches[1]);
- }
- else {
- // if the match_type is name and no regular expression was provided
- // then use the first word as the name, otherwise, we don't set the unqiuename
- if (strcmp($match_type, 'Unique name')==0) {
- if(preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches)){
- $uname = trim($matches[1]);
- }
- else {
- watchdog('trp-fasta', "ERROR: Cannot find a feature unique name. Line %line.", array('%line' => $i), 'error');
- }
- }
- }
- // get the accession
- preg_match("/$re_accession/", $line, $matches);
- if (strlen($matches[1]) > $dbxref_tbl['fields']['accession']['length']) {
- watchdog('trp-fasta', "WARNING: Regular expression retrieves an accession too long for the feature name. Cannot add cross reference. Line %line.", array('%line' => $i), 'warning');
- }
- else {
- $accession = trim($matches[1]);
- }
- // get the relationship subject
- preg_match("/$re_subject/", $line, $matches);
- $subject = trim($matches[1]);
- }
- else {
- $residues .= trim($line);
-
- // update the job status every % features
- if ($job and $intv_read >= $interval) {
- $intv_read = 0;
- $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
- if ($name) {
- print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $name\r";
- }
- else {
- print "Parsing Line $i (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes. Current feature: $uname\r";
- }
- tripal_job_set_progress($job, intval(($num_read / $filesize) * 100));
- }
- }
- }
-
- // now load the last sequence in the file
- tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
- $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
- $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm);
-
- // commit the transaction
- tripal_db_commit_transaction();
- print "\nDone\n";
- }
- /**
- *
- *
- * @ingroup fasta_loader
- */
- function tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id, $accession,
- $parent, $rel_type, $parent_type, $analysis_id, $organism_id, $cvterm,
- $source, $residues, $method, $re_name, $match_type, $parentcvterm, $relcvterm) {
- // check to see if this feature already exists if the match_type is 'Name'
- if (strcmp($match_type, 'Name')==0) {
- $values = array(
- 'organism_id' => $organism_id,
- 'name' => $name,
- 'type_id' => $cvterm->cvterm_id,
- );
- $options = array('statement_name' => 'sel_feature_ornaty');
- $results = tripal_core_chado_select('feature', array('feature_id'), $values, $options);
- if (count($results) > 1) {
- watchdog('T_fasta_loader', "Multiple features exist with the name '%name' of type
- '%type' for the organism. skipping", array('%name' => $name, '%type' => $type));
- return 0;
- }
- if (count($results) == 1) {
- $feature = $results[0];
- }
- }
- // check to see if this feature already exists if the match_type is 'Unique Name'
- if (strcmp($match_type, 'Unique name')==0) {
- $values = array(
- 'organism_id' => $organism_id,
- 'uniquename' => $uname,
- 'type_id' => $cvterm->cvterm_id,
- );
- $options = array('statement_name' => 'sel_feature_oruqty');
- $results = tripal_core_chado_select('feature', array('feature_id'), $values, $options);
- if (count($results) > 1) {
- watchdog('T_fasta_loader', "Multiple features exist with the name '%name' of type
- '%type' for the organism. skipping", array('%name' => $name, '%type' => $type));
- return 0;
- }
- if (count($results) == 1) {
- $feature = $results[0];
- }
-
- // if the feature exists but this is an "insert only" method then skip this feature
- if ($feature and (strcmp($method, 'Insert only')==0)) {
- watchdog('T_fasta_loader', "Feature already exists '%name' ('%uname') while matching on %type. Skipping insert.",
- array('%name' => $name, '%uname' => $uname, '%type' => drupal_strtolower($match_type)), WATCHDOG_WARNING);
- return 0;
- }
- }
- // if we don't have a feature and we're doing an insert then do the insert
- $inserted = 0;
- if (!$feature and (strcmp($method, 'Insert only')==0 or strcmp($method, 'Insert and update')==0)) {
- // if we have a unique name but not a name then set them to be the same and vice versa
- if (!$uname) {
- $uname = $name;
- }
- elseif (!$name) {
- $name = $uname;
- }
-
- // insert the feature
- $values = array(
- 'organism_id' => $organism_id,
- 'name' => $name,
- 'uniquename' => $uname,
- 'residues' => $residues,
- 'seqlen' => drupal_strlen($residues),
- 'md5checksum' => md5($residues),
- 'type_id' => $cvterm->cvterm_id,
- 'is_analysis' => 'FALSE',
- 'is_obsolete' => 'FALSE',
- );
- $options = array('statement_name' => 'ins_feature_all');
- $success = tripal_core_chado_insert('feature', $values, $options);
- if (!$success) {
- watchdog('T_fasta_loader', "Failed to insert feature '%name (%uname)'",
- array('%name' => $name, '%uname' => $numane), WATCHDOG_ERROR);
- return 0;
- }
-
- // now get the feature we just inserted
- $values = array(
- 'organism_id' => $organism_id,
- 'uniquename' => $uname,
- 'type_id' => $cvterm->cvterm_id,
- );
- $options = array('statement_name' => 'sel_feature_oruqty');
- $results = tripal_core_chado_select('feature', array('feature_id'), $values, $options);
- if (count($results) == 1) {
- $inserted = 1;
- $feature = $results[0];
- }
- else {
- watchdog('T_fasta_loader', "Failed to retreive newly inserted feature '%name (%uname)'",
- array('%name' => $name, '%uname' => $numane), WATCHDOG_ERROR);
- return 0;
- }
- }
-
- // if we don't have a feature and the user wants to do an update then fail
- if (!$feature and (strcmp($method, 'Update only')==0 or drupal_strcmp($method, 'Insert and update')==0)) {
- watchdog('T_fasta_loader', "Failed to find feature '%name' ('%uname') while matching on " .
- drupal_strtolower($match_type), array('%name' => $name, '%uname' => $uname), WATCHDOG_ERROR);
- return 0;
- }
-
- // if we do have a feature and this is an update then proceed with the update
- if ($feature and !$inserted and (strcmp($method, 'Update only')==0 or strcmp($method, 'Insert and update')==0)) {
- // if the user wants to match on the Name field
- if (strcmp($match_type, 'Name')==0) {
- // if we're matching on the name but do not have a unique name then we don't want to update the uniquename.
- $values = array();
- if ($uname) {
- // first check to make sure that by changing the unique name of this feature that we won't conflict with
- // another existing feature of the same name
- $values = array(
- 'organism_id' => $organism_id,
- 'uniquename' => $uname,
- 'type_id' => $cvterm->cvterm_id,
- );
- $options = array('statement_name' => 'sel_feature_oruqty');
- $results = tripal_core_chado_select('feature', array('feature_id'), $values, $options);
- if (count($results) > 0) {
- watchdog('T_fasta_loader', "Cannot update the feature '%name' with a uniquename of '%uname' and type of '%type' as it
- conflicts with an existing feature with the same uniquename and type.",
- array('%name' => $name, '%uname' => $uname, '%type' => $type));
- return 0;
- }
-
- // the changes to the uniquename don't conflict so proceed with the update
- $values = array(
- 'uniquename' => $uname,
- 'residues' => $residues,
- 'seqlen' => drupal_strlen($residues),
- 'md5checksum' => md5($residues),
- 'is_analysis' => 'false',
- 'is_obsolete' => 'false',
- );
- $match = array(
- 'name' => $name,
- 'organism_id' => $organism_id,
- 'type_id' => $cvterm->cvterm_id,
- );
- $options = array('statement_name' => 'upd_feature_resemdisis_naorty_un');
- }
- // if we do not have a new unique name then don't change the existing uniquename field
- else {
- $values = array(
- 'residues' => $residues,
- 'seqlen' => drupal_strlen($residues),
- 'md5checksum' => md5($residues),
- 'is_analysis' => 'false',
- 'is_obsolete' => 'false',
- );
- $match = array(
- 'name' => $name,
- 'organism_id' => $organism_id,
- 'type_id' => $cvterm->cvterm_id,
- );
- $options = array('statement_name' => 'upd_feature_unresemdisis_naorty');
- }
-
- // perform the update
- $success = tripal_core_chado_update('feature', $match, $values, $options);
- if (!$success) {
- watchdog('T_fasta_loader', "Failed to update feature '%name' ('%name')",
- array('%name' => $name, '%uiname' => $uname), WATCHDOG_ERROR);
- return 0;
- }
- }
- if (strcmp($match_type, 'Unique name')==0) {
- // if we're matching on the uniquename but do not have a new name then we don't want to update the name.
- $values = array();
- if ($name) {
- $values = array(
- 'name' => $name,
- 'residues' => $residues,
- 'seqlen' => drupal_strlen($residues),
- 'md5checksum' => md5($residues),
- 'is_analysis' => 'false',
- 'is_obsolete' => 'false',
- );
- $match = array(
- 'uniquename' => $uname,
- 'organism_id' => $organism_id,
- 'type_id' => $cvterm->cvterm_id,
- );
- $options = array('statement_name' => 'upd_feature_resemdisis_unorty_na');
- }
- // if we have a unique name then update it after matching by the name
- else {
- $values = array(
- 'residues' => $residues,
- 'seqlen' => drupal_strlen($residues),
- 'md5checksum' => md5($residues),
- 'is_analysis' => 'false',
- 'is_obsolete' => 'false',
- );
- $match = array(
- 'uniquename' => $uname,
- 'organism_id' => $organism_id,
- 'type_id' => $cvterm->cvterm_id,
- );
- $options = array('statement_name' => 'upd_feature_naresemdisis_unorty');
- }
- $success = tripal_core_chado_update('feature', $match, $values, $options);
- if (!$success) {
- watchdog('T_fasta_loader', "Failed to update feature '%name' ('%name')",
- array('%name' => $name, '%uiname' => $uname), WATCHDOG_ERROR);
- return 0;
- }
- }
- }
-
- // add in the analysis link
- if ($analysis_id) {
- // if the association doens't alredy exist then add one
- $values = array(
- 'analysis_id' => $analysis_id,
- 'feature_id' => $feature->feature_id,
- );
- $sel_options = array('statement_name' => 'sel_analysisfeature_anfe');
- $results = tripal_core_chado_select('analysisfeature', array('analysisfeature_id'), $values, $sel_options);
- if (count($results) == 0) {
- $ins_options = array('statement_name' => 'ins_analysisfeature_anfe');
- $success = tripal_core_chado_insert('analysisfeature', $values, $ins_options);
- if (!$success) {
- watchdog('T_fasta_loader', "Failed to associate analysis and feature '%name' ('%name')",
- array('%name' => $name, '%uname' => $uname), WATCHDOG_ERROR);
- return 0;
- }
- }
- }
- // now add the database cross reference
- if ($db_id) {
- // check to see if this accession reference exists, if not add it
- $values = array(
- 'db_id' => $db_id,
- 'accession' => $accession
- );
- $sel_options = array('statement_name' => 'sel_dbxref_dbac');
- $results = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $sel_options);
- // if the accession doesn't exist then add it
- if (count($results) == 0) {
- $ins_options = array('statement_name' => 'ins_dbxref_dbac');
- $results = tripal_core_chado_insert('dbxref', $values, $ins_options);
- if (!$results) {
- watchdog('T_fasta_loader', "Failed to add database accession '%accession'",
- array('%accession' => $accession), WATCHDOG_ERROR);
- return 0;
- }
- $results = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $sel_options);
- if (count($results) == 1) {
- $dbxref = $results[0];
- }
- else {
- watchdog('T_fasta_loader', "Failed to retreive newly inserted dbxref '%name (%uname)'",
- array('%name' => $name, '%uname' => $numane), WATCHDOG_ERROR);
- return 0;
- }
- }
- else {
- $dbxref = $results[0];
- }
- // check to see if the feature dbxref record exists if not, then add it
- $values = array(
- 'feature_id' => $feature->feature_id,
- 'dbxref_id' => $dbxref->dbxref_id
- );
- $sel_options = array('statement_name' => 'sel_featuredbxref_fedb');
- $results = tripal_core_chado_select('feature_dbxref', array('feature_dbxref_id'), $values, $sel_options);
- if (count($results) == 0) {
- $ins_options = array('statement_name' => 'ins_featuredbxref_fedb');
- $success = tripal_core_chado_insert('feature_dbxref', $values, $ins_options);
- if (!$success) {
- watchdog('T_fasta_loader', "Failed to add associate database accession '%accession' with feature",
- array('%accession' => $accession), WATCHDOG_ERROR);
- return 0;
- }
- }
- }
- // now add in the relationship if one exists. If not, then add it
- if ($rel_type) {
- $values = array(
- 'organism_id' => $organism_id,
- 'uniquename' => $parent,
- 'type_id' => $parentcvterm->cvterm_id,
- );
- $options = array('statement_name' => 'sel_feature_oruqty');
- $results = tripal_core_chado_select('feature', array('feature_id'), $values, $options);
- if (count($results) != 1) {
- watchdog('T_fasta_loader', "Cannot find a unique fature for the parent '%parent' of type
- '%type' for the feature.", array('%parent' => $parent, '%type' => $parent_type));
- return 0;
- }
- $parent_feature = $results[0];
-
- // check to see if the relationship already exists if not then add it
- $values = array(
- 'subject_id' => $feature->feature_id,
- 'object_id' => $parent_feature->feature_id,
- 'type_id' => $relcvterm->cvterm_id,
- );
- $sel_options = array('statement_name' => 'sel_featurerelationship_suojty');
- $results = tripal_core_chado_select('feature_relationship', array('feature_relationship_id'), $values, $sel_options);
- if (count($results) == 0) {
- $ins_options = array('statement_name' => 'ins_featurerelationship_suojty');
- $success = tripal_core_chado_insert('feature_relationship', $values, $ins_options);
- if (!$success) {
- watchdog('T_fasta_loader', "Failed to add associate database accession '%accession' with feature",
- array('%accession' => $accession), WATCHDOG_ERROR);
- return 0;
- }
- }
- }
- }
|