| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730 | 
							- <?php
 
- /**
 
-  * @file
 
-  * @todo Add file header description
 
-  */
 
- /**
 
-  * @defgroup fasta_loader FASTA Feature Loader
 
-  * @{
 
-  * Provides fasta loading functionality. Creates features based on their specification in a fasta file.
 
-  * @}
 
-  * @ingroup tripal_feature
 
-  */
 
- /**
 
-  *
 
-  *
 
-  * @ingroup fasta_loader
 
-  */
 
- function tripal_feature_fasta_load_form( ) {
 
-   $form['fasta_file']= array(
 
-     '#type'          => 'textfield',
 
-     '#title'         => t('FASTA File'),
 
-     '#description'   => t('Please enter the full system path for the FASTA file, or a path within the Drupal
 
-                            installation (e.g. /sites/default/files/xyz.obo).  The path must be accessible to the
 
-                            server on which this Drupal instance is running.'),
 
-     '#required' => TRUE,
 
-   );
 
-   // get the list of organisms
 
-   $sql = "SELECT * FROM {organism} ORDER BY genus, species";
 
-   $previous_db = tripal_db_set_active('chado');  // use chado database
 
-   $org_rset = db_query($sql);
 
-   tripal_db_set_active($previous_db);  // now use drupal database
 
-   $organisms = array();
 
-   $organisms[''] = '';
 
-   while ($organism = db_fetch_object($org_rset)) {
 
-     $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
 
-   }
 
-   $form['organism_id'] = array(
 
-    '#title'       => t('Organism'),
 
-    '#type'        => t('select'),
 
-    '#description' => t("Choose the organism to which these sequences are associated"),
 
-    '#required'    => TRUE,
 
-    '#options'     => $organisms,
 
-   );
 
-   $form['seqtype']= array(
 
-     '#type' => 'textfield',
 
-     '#title' => t('Sequence Type'),
 
-     '#required' => TRUE,
 
-     '#description' => t('Please enter the Sequence Ontology term that describes the sequences in the FASTA file.'),
 
-   );
 
-   // get the list of organisms
 
-   $sql = "SELECT L.library_id, L.name, CVT.name as type
 
-          FROM {library} L
 
-             INNER JOIN {cvterm} CVT ON L.type_id = CVT.cvterm_id
 
-          ORDER BY name";
 
-   $previous_db = tripal_db_set_active('chado');  // use chado database
 
-   $lib_rset = db_query($sql);
 
-   tripal_db_set_active($previous_db);  // now use drupal database
 
-   $libraries = array();
 
-   $libraries[''] = '';
 
-   while ($library = db_fetch_object($lib_rset)) {
 
-     $libraries[$library->library_id] = "$library->name ($library->type)";
 
-   }
 
-   //   $form['library_id'] = array (
 
-   //     '#title'       => t('Library'),
 
-   //     '#type'        => t('select'),
 
-   //     '#description' => t("Choose the library to which these sequences are associated "),
 
-   //     '#required'    => FALSE,
 
-   //     '#options'     => $libraries,
 
-   //     '#weight'      => 5,
 
-   //   );
 
-   $form['method']= array(
 
-     '#type' => 'radios',
 
-     '#title' => 'Method',
 
-     '#required' => TRUE,
 
-     '#options' => array(
 
-       t('Insert only'),
 
-       t('Update only'),
 
-       t('Insert and update'),
 
-     ),
 
-     '#description' => t('Select how features in the FASTA file are handled.
 
-        Select "Insert only" to insert the new features. If a feature already
 
-        exists with the same name or unique name and type then it is skipped.
 
-        Select "Update only" to only update featues that already exist in the
 
-        database.  Select "Insert and Update" to insert features that do
 
-        not exist and upate those that do.'),
 
-     '#default_value' => 2,
 
-   );
 
-   $form['match_type']= array(
 
-     '#type' => 'radios',
 
-     '#title' => 'Name Match Type',
 
-     '#required' => TRUE,
 
-     '#options' => array(
 
-       t('Name'),
 
-       t('Unique name'),
 
-     ),
 
-     '#description' => t('Feature data is stored in Chado with both a human-readable
 
-       name and a unique name. If the features in your FASTA file are identified using
 
-       a human-readable name then select the "Name" button. If your features are
 
-       identified using the unique name then select the "Unique name" button.  If you
 
-       loaded your features first using the GFF loader then the unique name of each
 
-       features were indicated by the "ID=" attribute and the name by the "Name=" attribute.
 
-       By default, the FASTA loader will use the first word (character string
 
-       before the first space) as  the name for your feature. If
 
-       this does not uniquely identify your feature consider specifying a regular expression in the advanced section below.
 
-       Additionally, you may import both a name and a unique name for each sequence using the advanced options.
 
-       When updating a sequence, the value selected here will be used to identify the sequence in the
 
-       database in combination with any regular expression provided below.'),
 
-     '#default_value' => 1,
 
-   );
 
-   $form['analysis'] = array(
 
-     '#type' => 'fieldset',
 
-     '#title' => t('Analysis Used to Derive Features'),
 
-     '#collapsed' => TRUE
 
-   );
 
-   $form['analysis']['desc'] = array(
 
-     '#type' => 'markup',
 
-     '#value' => t("Why specify an analysis for a data load?  All data comes
 
-        from some place, even if downloaded from Genbank. By specifying
 
-        analysis details for all data uploads, it allows an end user to reproduce the
 
-        data set, but at least indicates the source of the data."),
 
-   );
 
-   // get the list of organisms
 
-   $sql = "SELECT * FROM {analysis} ORDER BY name";
 
-   $previous_db = tripal_db_set_active('chado');  // use chado database
 
-   $org_rset = db_query($sql);
 
-   tripal_db_set_active($previous_db);  // now use drupal database
 
-   $analyses = array();
 
-   $analyses[''] = '';
 
-   while ($analysis = db_fetch_object($org_rset)) {
 
-     $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
 
-   }
 
-   $form['analysis']['analysis_id'] = array(
 
-     '#title'       => t('Analysis'),
 
-     '#type'        => t('select'),
 
-     '#description' => t("Choose the analysis to which these features are associated"),
 
-     '#required'    => TRUE,
 
-     '#options'     => $analyses,
 
-   );
 
-   // Advanced Options
 
-   $form['advanced'] = array(
 
-     '#type' => 'fieldset',
 
-     '#title' => t('Advanced Options'),
 
-     '#collapsed' => TRUE
 
-   );
 
-   $form['advanced']['re_help']= array(
 
-     '#type' => 'item',
 
-     '#value' => t('A regular expression is an advanced method for extracting information from a string of text.
 
-                    Your FASTA file may contain both a human-readable name and a unique name for each sequence.
 
-                    If you want to import
 
-                    both the name and unique name for all sequences, then you must provide regular expressions
 
-                    so that the loader knows how to separate them.
 
-                    Otherwise the name and uniquename will be the same.
 
-                    By default, this loader will use the first word in the definition
 
-                    lines of the FASTA file
 
-                    as the name or unique name of the feature.'),
 
-   );
 
-   $form['advanced']['re_name']= array(
 
-     '#type' => 'textfield',
 
-     '#title' => t('Regular expression for the name'),
 
-     '#required' => FALSE,
 
-     '#description' => t('Enter the regular expression that will extract the
 
-        feature name from the FASTA definition line. For example, for a
 
-        defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
 
-        the regular expression for the name would be, "^(.*?)\|.*$".'),
 
-   );
 
-   $form['advanced']['re_uname']= array(
 
-     '#type' => 'textfield',
 
-     '#title' => t('Regular expression for the unique name'),
 
-     '#required' => FALSE,
 
-     '#description' => t('Enter the regular expression that will extract the
 
-        feature name from the FASTA definition line. For example, for a
 
-        defintion line with a name and unique name separated by a bar \'|\' (>seqname|uniquename),
 
-        the regular expression for the unique name would be "^.*?\|(.*)$").'),
 
-   );
 
-   // Advanced database cross-reference optoins
 
-   $form['advanced']['db'] = array(
 
-     '#type' => 'fieldset',
 
-     '#title' => t('External Database Reference'),
 
-     '#weight' => 6,
 
-     '#collapsed' => TRUE
 
-   );
 
-   $form['advanced']['db']['re_accession']= array(
 
-     '#type' => 'textfield',
 
-     '#title' => t('Regular expression for the accession'),
 
-     '#required' => FALSE,
 
-     '#description' => t('Enter the regular expression that will extract the accession for the external database for each feature from the FASTA definition line.'),
 
-     '#weight' => 2
 
-   );
 
-   // get the list of databases
 
-   $sql = "SELECT * FROM {db} ORDER BY name";
 
-   $previous_db = tripal_db_set_active('chado');  // use chado database
 
-   $db_rset = db_query($sql);
 
-   tripal_db_set_active($previous_db);  // now use drupal database
 
-   $dbs = array();
 
-   $dbs[''] = '';
 
-   while ($db = db_fetch_object($db_rset)) {
 
-     $dbs[$db->db_id] = "$db->name";
 
-   }
 
-   $form['advanced']['db']['db_id'] = array(
 
-    '#title'       => t('External Database'),
 
-    '#type'        => t('select'),
 
-    '#description' => t("Plese choose an external database for which these sequences have a cross reference."),
 
-    '#required'    => FALSE,
 
-    '#options'     => $dbs,
 
-    '#weight'      => 1,
 
-   );
 
-   $form['advanced']['relationship'] = array(
 
-     '#type' => 'fieldset',
 
-     '#title' => t('Relationships'),
 
-     '#weight' => 6,
 
-     '#collapsed' => TRUE
 
-   );
 
-   $rels = array();
 
-   $rels[''] = '';
 
-   $rels['part_of'] = 'part of';
 
-   $rels['derives_from'] = 'produced by';
 
-   // Advanced references options
 
-   $form['advanced']['relationship']['rel_type']= array(
 
-    '#title'       => t('Relationship Type'),
 
-    '#type'        => t('select'),
 
-    '#description' => t("Use this option to create associations, or relationships between the
 
-                         features of this FASTA file and existing features in the database. For
 
-                         example, to associate a FASTA file of peptides to existing genes or transcript sequence,
 
-                         select the type 'produced by'. For a CDS sequences select the type 'part of'"),
 
-    '#required'    => FALSE,
 
-    '#options'     => $rels,
 
-    '#weight'      => 5,
 
-   );
 
-   $form['advanced']['relationship']['re_subject']= array(
 
-     '#type' => 'textfield',
 
-     '#title' => t('Regular expression for the parent'),
 
-     '#required' => FALSE,
 
-     '#description' => t('Enter the regular expression that will extract the unique
 
-                          name needed to identify the existing sequence for which the
 
-                          relationship type selected above will apply.'),
 
-     '#weight' => 6
 
-   );
 
-   $form['advanced']['relationship']['parent_type']= array(
 
-     '#type' => 'textfield',
 
-     '#title' => t('Parent Type'),
 
-     '#required' => FALSE,
 
-     '#description' => t('Please enter the Sequence Ontology term for the parent.  For example
 
-                          if the FASTA file being loaded is a set of proteins that are
 
-                          products of genes, then use the SO term \'gene\' or \'transcript\' or equivalent. However,
 
-                          this type must match the type for already loaded features.'),
 
-     '#weight' => 7
 
-   );
 
-   $form['button'] = array(
 
-     '#type' => 'submit',
 
-     '#value' => t('Import FASTA file'),
 
-     '#weight' => 10,
 
-   );
 
-   return $form;
 
- }
 
- /**
 
-  *
 
-  *
 
-  * @ingroup fasta_loader
 
-  */
 
- function tripal_feature_fasta_load_form_validate($form, &$form_state) {
 
-   $fasta_file = trim($form_state['values']['fasta_file']);
 
-   $organism_id  = $form_state['values']['organism_id'];
 
-   $type         = trim($form_state['values']['seqtype']);
 
-   $method       = trim($form_state['values']['method']);
 
-   $match_type   = trim($form_state['values']['match_type']);
 
-   $library_id   = $form_state['values']['library_id'];
 
-   $re_name      = trim($form_state['values']['re_name']);
 
-   $re_uname     = trim($form_state['values']['re_uname']);
 
-   $re_accession = trim($form_state['values']['re_accession']);
 
-   $db_id        = $form_state['values']['db_id'];
 
-   $rel_type     = $form_state['values']['rel_type'];
 
-   $re_subject   = trim($form_state['values']['re_subject']);
 
-   $parent_type   = trim($form_state['values']['parent_type']);
 
-   if ($method == 0) {
 
-     $method = 'Insert only';
 
-   }
 
-   if ($method == 1) {
 
-     $method = 'Update only';
 
-   }
 
-   if ($method == 2) {
 
-     $method = 'Insert and update';
 
-   }
 
-   if ($match_type == 0) {
 
-     $match_type = 'Name';
 
-   }
 
-   if ($match_type == 1) {
 
-     $match_type = 'Unique name';
 
-   }
 
-   if ($re_name and !$re_uname and strcmp($match_type, 'Unique name')==0) {
 
-     form_set_error('re_uname', t("You must provide a regular expression to identify the sequence unique name"));
 
-   }
 
-   if (!$re_name and $re_uname and strcmp($match_type, 'Name')==0) {
 
-     form_set_error('re_name', t("You must provide a regular expression to identify the sequence name"));
 
-   }
 
-   // check to see if the file is located local to Drupal
 
-   $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $fasta_file;
 
-   if (!file_exists($dfile)) {
 
-     // if not local to Drupal, the file must be someplace else, just use
 
-     // the full path provided
 
-     $dfile = $fasta_file;
 
-   }
 
-   if (!file_exists($dfile)) {
 
-     form_set_error('fasta_file', t("Cannot find the file on the system. Check that the file exists or that the web server has permissions to read the file."));
 
-   }
 
-   // make sure if a relationship is specified that all fields are provided.
 
-   if (($rel_type or $parent_type) and !$re_subject) {
 
-     form_set_error('re_subject', t("Please provide a regular expression for the parent"));
 
-   }
 
-   if (($rel_type or $re_subject) and !$parent_type) {
 
-     form_set_error('parent_type', t("Please provide a SO term for the parent"));
 
-   }
 
-   if (($parent_type or $re_subject) and !$rel_type) {
 
-     form_set_error('rel_type', t("Please select a relationship type"));
 
-   }
 
-   // make sure if a database is specified that all fields are provided
 
-   if ($db_id and !$re_accession) {
 
-     form_set_error('re_accession', t("Please provide a regular expression for the accession"));
 
-   }
 
-   if ($re_accession and !$db_id) {
 
-     form_set_error('db_id', t("Please select a database"));
 
-   }
 
-   // check to make sure the types exists
 
-   $cvtermsql = "SELECT CVT.cvterm_id
 
-                FROM {cvterm} CVT
 
-                   INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
 
-                   LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
 
-                WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')";
 
-   $cvterm = db_fetch_object(db_query($cvtermsql, 'sequence', $type, $type));
 
-   if (!$cvterm) {
 
-     form_set_error('type', t("The Sequence Ontology (SO) term selected for the sequence type is not available in the database. Please check spelling or select another."));
 
-   }
 
-   if ($rel_type) {
 
-     $cvterm = db_fetch_object(db_query($cvtermsql, 'sequence', $parent_type, $parent_type));
 
-     if (!$cvterm) {
 
-       form_set_error('parent_type', t("The Sequence Ontology (SO) term selected for the parent relationship is not available in the database. Please check spelling or select another."));
 
-     }
 
-   }
 
-   // check to make sure the 'relationship' and 'sequence' ontologies are loaded
 
-   $form_state['storage']['dfile'] = $dfile;
 
- }
 
- /**
 
-  *
 
-  *
 
-  * @ingroup fasta_loader
 
-  */
 
- function tripal_feature_fasta_load_form_submit($form, &$form_state) {
 
-   global $user;
 
-   $dfile        = $form_state['storage']['dfile'];
 
-   $organism_id  = $form_state['values']['organism_id'];
 
-   $type         = trim($form_state['values']['seqtype']);
 
-   $method       = trim($form_state['values']['method']);
 
-   $match_type   = trim($form_state['values']['match_type']);
 
-   $library_id   = $form_state['values']['library_id'];
 
-   $re_name      = trim($form_state['values']['re_name']);
 
-   $re_uname     = trim($form_state['values']['re_uname']);
 
-   $re_accession = trim($form_state['values']['re_accession']);
 
-   $db_id        = $form_state['values']['db_id'];
 
-   $rel_type     = $form_state['values']['rel_type'];
 
-   $re_subject   = trim($form_state['values']['re_subject']);
 
-   $parent_type   = trim($form_state['values']['parent_type']);
 
-   $analysis_id = $form_state['values']['analysis_id'];
 
-   if ($method == 0) {
 
-     $method = 'Insert only';
 
-   }
 
-   if ($method == 1) {
 
-     $method = 'Update only';
 
-   }
 
-   if ($method == 2) {
 
-     $method = 'Insert and update';
 
-   }
 
-   if ($match_type == 0) {
 
-     $match_type = 'Name';
 
-   }
 
-   if ($match_type == 1) {
 
-     $match_type = 'Unique name';
 
-   }
 
-   $args = array($dfile, $organism_id, $type, $library_id, $re_name, $re_uname,
 
-           $re_accession, $db_id, $rel_type, $re_subject, $parent_type, $method,
 
-           $user->uid, $analysis_id, $match_type);
 
-   tripal_add_job("Import FASTA file: $dfile", 'tripal_feature',
 
-     'tripal_feature_load_fasta', $args, $user->uid);
 
- }
 
- /**
 
-  *
 
-  *
 
-  * @ingroup fasta_loader
 
-  */
 
- function tripal_feature_load_fasta($dfile, $organism_id, $type,
 
-   $library_id, $re_name, $re_uname, $re_accession, $db_id, $rel_type,
 
-   $re_subject, $parent_type, $method, $uid, $analysis_id,
 
-   $match_type, $job = NULL) {
 
-   print "Opening FASTA file $dfile\n";
 
-   $lines = file($dfile, FILE_SKIP_EMPTY_LINES);
 
-   $i = 0;
 
-   $name = '';
 
-   $uname = '';
 
-   $residues = '';
 
-   $num_lines = sizeof($lines);
 
-   $interval = intval($num_lines * 0.01);
 
-   if ($interval == 0) {
 
-     $interval = 1;
 
-   }
 
-   foreach ($lines as $line_num => $line) {
 
-     $i++;  // update the line count
 
-     // update the job status every 1% features
 
-     if ($job and $i % $interval == 0) {
 
-       tripal_job_set_progress($job, intval(($i/$num_lines)*100));
 
-     }
 
-     // if we encounter a definition line then get the name, uniquename,
 
-     // accession and relationship subject from the definition line
 
-     if (preg_match('/^>/', $line)) {
 
-       // if we have a feature name then we are starting a new sequence
 
-       // so let's handle the previous one before moving on
 
-       if ($name or $uname) {
 
-         tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
 
-           $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $type,
 
-           $source, $residues, $method, $re_name, $match_type);
 
-         $residues = '';
 
-         $name = '';
 
-         $uname = '';
 
-       }
 
-       $line = preg_replace("/^>/", '', $line);
 
-       // get the feature name
 
-       if ($re_name) {
 
-         if (!preg_match("/$re_name/", $line, $matches)) {
 
-           print "WARNING: Regular expression for the feature name finds nothing\n";
 
-         }
 
-         $name = trim($matches[1]);
 
-       }
 
-       else {
 
-         // if the match_type is name and no regular expression was provided
 
-         // then use the first word as the name, otherwise we don't set the name
 
-         if (strcmp($match_type, 'Name')==0) {
 
-           preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches);
 
-           $name = trim($matches[1]);
 
-         }
 
-       }
 
-       // get the feature unique name
 
-       if ($re_uname) {
 
-         if (!preg_match("/$re_uname/", $line, $matches)) {
 
-           print "WARNING: Regular expression for the feature unique name finds nothing\n";
 
-         }
 
-         $uname = trim($matches[1]);
 
-       }
 
-       else {
 
-         // if the match_type is name and no regular expression was provided
 
-         // then use the first word as the name, otherwise, we don't set the unqiuename
 
-         if (strcmp($match_type, 'Unique name')==0) {
 
-           preg_match("/^\s*(.*?)[\s\|].*$/", $line, $matches);
 
-           $uname = trim($matches[1]);
 
-         }
 
-       }
 
-       // get the accession
 
-       preg_match("/$re_accession/", $line, $matches);
 
-       $accession = trim($matches[1]);
 
-       // get the relationship subject
 
-       preg_match("/$re_subject/", $line, $matches);
 
-       $subject = trim($matches[1]);
 
-       }
 
-     else {
 
-       $residues .= trim($line);
 
-     }
 
-   }
 
-    // now load the last sequence in the file
 
-   tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id,
 
-     $accession, $subject, $rel_type, $parent_type, $analysis_id, $organism_id, $type,
 
-     $source, $residues, $method, $re_name, $match_type);
 
-   return '';
 
- }
 
- /**
 
-  *
 
-  *
 
-  * @ingroup fasta_loader
 
-  */
 
- function tripal_feature_fasta_loader_handle_feature($name, $uname, $db_id, $accession,
 
-   $parent, $rel_type, $parent_type, $analysis_id, $organism_id, $type,
 
-   $source, $residues, $method, $re_name, $match_type) {
 
-   $previous_db = tripal_db_set_active('chado');
 
-   // first get the type for this sequence
 
-   $cvtermsql = "SELECT CVT.cvterm_id
 
-                FROM {cvterm} CVT
 
-                   INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
 
-                   LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
 
-                WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')";
 
-   $cvterm = db_fetch_object(db_query($cvtermsql, 'sequence', $type, $type));
 
-   if (!$cvterm) {
 
-     print "ERROR: cannot find the term type: '$type'\n";
 
-     return 0;
 
-   }
 
-    // check to see if this feature already exists
 
-   if (strcmp($match_type, 'Name')==0) {
 
-     $cnt_sql = "SELECT count(*) as cnt FROM {feature}
 
-                   WHERE organism_id = %d and name = '%s' and type_id = %d";
 
-     $cnt = db_fetch_object(db_query($cnt_sql, $organism_id, $name, $cvterm->cvterm_id));
 
-     if ($cnt->cnt > 1) {
 
-       print "ERROR: multiple features exist with the name '$name' of type '$type' for the organism.  skipping\n";
 
-       return 0;
 
-     }
 
-     else {
 
-       $feature_sql = "SELECT * FROM {feature}
 
-                   WHERE organism_id = %d and name = '%s' and type_id = %d";
 
-       $feature = db_fetch_object(db_query($feature_sql, $organism_id, $name, $cvterm->cvterm_id));
 
-     }
 
-   }
 
-   if (strcmp($match_type, 'Unique name')==0) {
 
-     $feature_sql = "SELECT * FROM {feature}
 
-                     WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
 
-     $feature = db_fetch_object(db_query($feature_sql, $organism_id, $uname, $cvterm->cvterm_id));
 
-   }
 
-   if (!$feature and (strcmp($method, 'Insert only')==0 or strcmp($method, 'Insert and update')==0)) {
 
-     // if we have a unique name but not a name then set them to be teh same
 
-     // and vice versa
 
-     if (!$uname) {
 
-       $uname = $name;
 
-     }
 
-     elseif (!$name) {
 
-       $name = $uname;
 
-     }
 
-     // now insert the feature
 
-     $sql = "INSERT INTO {feature}
 
-            (organism_id, name, uniquename, residues, seqlen,
 
-             md5checksum,type_id,is_analysis,is_obsolete)
 
-         VALUES(%d,'%s','%s','%s',%d, '%s', %d, %s, %s)";
 
-     $result = db_query($sql, $organism_id, $name, $uname, $residues, drupal_strlen($residues),
 
-             md5($residues), $cvterm->cvterm_id, 'false', 'false');
 
-     if (!$result) {
 
-       print "ERROR: failed to insert feature '$name ($uname)'\n";
 
-       return 0;
 
-     }
 
-     else {
 
-       print "Inserted feature $name ($uname)\n";
 
-     }
 
-     $feature = db_fetch_object(db_query($feature_sql, $organism_id, $uname, $cvterm->cvterm_id));
 
-     }
 
-   if (!$feature and (strcmp($method, 'Update only')==0 or drupal_strcmp($method, 'Insert and update')==0)) {
 
-     print "WARNING: failed to find feature '$name' ('$uname') while matching on " . drupal_strtolower($match_type) . ". Skipping\n";
 
-     return 0;
 
-   }
 
-   if ($feature and (strcmp($method, 'Update only')==0 or strcmp($method, 'Insert and update')==0)) {
 
-     if (strcmp($method, 'Update only')==0 or strcmp($method, 'Insert and update')==0) {
 
-       if (strcmp($match_type, 'Name')==0) {
 
-         // if we're matching on the name but do not have a new unique name then we
 
-         // don't want to update the uniquename.  If we do have a uniquename then we
 
-         // should update it.  We only get a uniquename if there was a regular expression
 
-         // provided for pulling it out
 
-         if ($uname) {
 
-           $sql = "UPDATE {feature}
 
-                 SET uniquename = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
 
-                 WHERE organism_id = %d and name = '%s' and type_id = %d";
 
-           $result = db_query($sql, $uname, $residues, drupal_strlen($residues), md5($residues), $organism_id, $name, $cvterm->cvterm_id);
 
-         }
 
-         else {
 
-           $sql = "UPDATE {feature}
 
-                   SET residues = '%s', seqlen = '%s', md5checksum = '%s'
 
-                   WHERE organism_id = %d and name = '%s' and type_id = %d";
 
-           $result = db_query($sql, $residues, durpal_strlen($residues), md5($residues), $organism_id, $name, $cvterm->cvterm_id);
 
-         }
 
-         }
 
-         else {
 
-           // if we're matching on the unique name but do not have a new name then we
 
-           // don't want to update the name.  If we do have a name then we
 
-           // should update it.  We only get a name if there was a regular expression
 
-           // provided for pulling it out
 
-           if ($name) {
 
-             $sql = "UPDATE {feature}
 
-                     SET name = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
 
-                     WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
 
-             $result = db_query($sql, $name, $residues, drupal_strlen($residues), md5($residues), $organism_id, $uname, $cvterm->cvterm_id);
 
-           }
 
-           else {
 
-             $sql = "UPDATE {feature}
 
-                     SET residues = '%s', seqlen = '%s', md5checksum = '%s'
 
-                     WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
 
-             $result = db_query($sql, $residues, drupal_strlen($residues), md5($residues), $organism_id, $uname, $cvterm->cvterm_id);
 
-           }
 
-     }
 
-     if (!$result) {
 
-       print "ERROR: failed to update feature '$name ($uname)'\n";
 
-       return 0;
 
-     }
 
-     else {
 
-       print "Updated feature $name ($uname)\n";
 
-     }
 
-     }
 
-     else {
 
-       print "WARNING: feature already exists: '$name' ('$uname'). Skipping\n";
 
-     }
 
-   }
 
-    // now get the feature
 
-   $feature = db_fetch_object(db_query($feature_sql, $organism_id, $uname, $cvterm->cvterm_id));
 
-   if (!$feature) {
 
-     print "Something bad has happened: $organism_id, $uname, $cvterm->cvterm_id\n";
 
-     return 0;
 
-   }
 
-   // add in the analysis link
 
-   if ($analysis_id) {
 
-     // @coder-ignore: non-drupal table thus table prefixing doesn't apply
 
-     $analysis_link_sql = 'SELECT * FROM analysisfeature WHERE analysis_id=%d AND feature_id=%d';
 
-     $analysis_link = db_fetch_object(db_query($analysis_link_sql, $analysis_id, $feature->feature_id));
 
-     if (!$analysis_link) {
 
-       // @coder-ignore: non-drupal table thus table prefixing doesn't apply
 
-       $sql = "INSERT INTO analysisfeature (analysis_id, feature_id) VALUES (%d, %d)";
 
-       $result = db_query($sql, $analysis_id, $feature->feature_id);
 
-       if (!$result) {
 
-         print "WARNING: could not add link between analysis: " . $analysis_id . " and feature: " . $feature->uniquename . "\n";
 
-       }
 
-       $analysis_link = db_fetch_object(db_query($analysis_link_sql, $analysis_id, $feature->feature_id));
 
-     }
 
-   }
 
-    // now add the database cross reference
 
-   if ($db_id) {
 
-     // check to see if this accession reference exists, if not add it
 
-     // @coder-ignore: non-drupal table thus table prefixing doesn't apply
 
-     $dbxrefsql = "SELECT * FROM dbxref WHERE db_id = %d and accession = '%s'";
 
-     $dbxref = db_fetch_object(db_query($dbxrefsql, $db_id, $accession));
 
-     if (!$dbxref) {
 
-       // @coder-ignore: non-drupal table thus table prefixing doesn't apply
 
-       $sql = "INSERT INTO dbxref (db_id,accession) VALUES (%d, '%s')";
 
-       $result = db_query($sql, $db_id, $accession);
 
-       if (!$result) {
 
-         print "WARNING: could not add external database acession: '$name accession: $accession'\n";
 
-       }
 
-       $dbxref = db_fetch_object(db_query($dbxrefsql, $db_id, $accession));
 
-     }
 
-     // check to see if the feature dbxref record exists if not, then add it
 
-     $fdbxrefsql = "SELECT * FROM {feature_dbxref} WHERE feature_id = %d and dbxref_id = %d";
 
-     $fdbxref = db_fetch_object(db_query($fdbxrefsql, $feature->feature_id, $dbxref->dbxref_id));
 
-     if (!$fdbxref) {
 
-       $sql = "INSERT INTO {feature_dbxref} (feature_id,dbxref_id) VALUES (%d, %d)";
 
-       $result = db_query($sql, $feature->feature_id, $dbxref->dbxref_id);
 
-       if (!$result) {
 
-         print "WARNING: could not associate database cross reference with feature: '$name accession: $accession'\n";
 
-       }
 
-       else {
 
-         print "Added database crossreference $name ($uname) -> $accession\n";
 
-       }
 
-     }
 
-     }
 
-    // now add in the relationship if one exists.  First, get the parent type for the relationship
 
-    // then get the parent feature
 
-   if ($rel_type) {
 
-     $parentcvterm = db_fetch_object(db_query($cvtermsql, 'sequence', $parent_type, $parent_type));
 
-     $relcvterm = db_fetch_object(db_query($cvtermsql, 'relationship', $rel_type, $rel_type));
 
-     $parent_feature = db_fetch_object(db_query($feature_sql, $organism_id, $parent, $parentcvterm->cvterm_id));
 
-     if ($parent_feature) {
 
-       // check to see if the relationship already exists
 
-       $sql = "SELECT * FROM {feature_relationship} WHERE subject_id = %d and object_id = %d and type_id = %d";
 
-       $rel = db_fetch_object(db_query($sql, $feature->feature_id, $parent_feature->feature_id, $relcvterm->cvterm_id));
 
-       if ($rel) {
 
-         print "WARNING: relationship already exists, skipping '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
 
-       }
 
-       else {
 
-         $sql = "INSERT INTO {feature_relationship} (subject_id,object_id,type_id)
 
-                 VALUES (%d,%d,%d)";
 
-         $result = db_query($sql, $feature->feature_id, $parent_feature->feature_id, $relcvterm->cvterm_id);
 
-         if (!$result) {
 
-           print "WARNING: failed to insert feature relationship '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
 
-         }
 
-         else {
 
-           print "Inserted relationship relationship: '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
 
-         }
 
-       }
 
-     }
 
-     else {
 
-       print "WARNING: cannot establish relationship '$uname' ($type) $rel_type '$parent' ($parent_type): Cannot find the parent\n";
 
-     }
 
-     }
 
-   tripal_db_set_active($previous_db);
 
- }
 
 
  |