Browse Source

Corrected the feature insert to use uniquename, organism_id and type_id to check for uniqueness. This correction also required naming the Drupal nodes uniquely as well. Also added support for Relationships on the feature page and showing the sequences for relationships.

spficklin 14 years ago
parent
commit
a52a64b19b

+ 285 - 0
tripal_feature/admin.php

@@ -0,0 +1,285 @@
+<?php
+
+/************************************************************************
+ *
+ */
+function tripal_feature_admin () {
+
+   // before proceeding check to see if we have any
+   // currently processing jobs. If so, we don't want
+   // to give the opportunity to sync libraries
+   $active_jobs = FALSE;
+   if(tripal_get_module_active_jobs('tripal_feature')){
+      $active_jobs = TRUE;
+   }
+   if(!$active_jobs){
+
+      $form['chado_feature_accession_prefix'] = array (
+         '#title'       => t('Accession Prefix'),
+         '#type'        => t('textfield'),
+         '#description' => t("Accession numbers for features consist of the ".
+            "chado feature_id and a site specific prefix.  Set the prefix that ".
+            "will be incorporated in front of each feature_id to form a unique ".
+            "accession number for this site."),
+         '#required'    => TRUE,
+         '#default_value' => variable_get('chado_feature_accession_prefix','ID'),
+      );
+
+      $form['chado_feature_types'] = array(
+         '#title'       => t('Feature Types'),
+         '#type'        => 'textarea',
+         '#description' => t('Enter the names of the sequence types that the ".
+            "site will support with independent pages.  Pages for these data ".
+            "types will be built automatically for features that exist in the ".
+            "chado database.  The names listed here should be spearated by ".
+            "spaces or entered separately on new lines. The names must match ".
+            "exactly (spelling and case) with terms in the sequence ontology'),
+         '#required'    => TRUE,
+         '#default_value' => variable_get('chado_feature_types','EST contig'),
+      );
+
+      $form['browser'] = array(
+         '#type' => 'fieldset',
+         '#title' => t('Feature Browser')
+      );
+      $allowedoptions1  = array (
+        'show_feature_browser' => "Show the feature browser on the organism page. The browser loads when page loads. This may be slow for large sites.",
+        'hide_feature_browser' => "Hide the feature browser on the organism page. Disables the feature browser completely.",
+      );
+//      $allowedoptions ['allow_feature_browser'] = "Allow loading of the feature browsing through AJAX. For large sites the initial page load will be quick with the feature browser loading afterwards.";
+
+      $form['browser']['browse_features'] = array(
+         '#title' => 'Feature Browser on Organism Page',
+         '#description' => 'A feature browser can be added to an organism page to allow users to quickly '. 
+            'access a feature.  This will most likely not be the ideal mechanism for accessing feature '.
+            'information, especially for large sites, but it will alow users exploring the site (such '.
+            'as students) to better understand the data types available on the site.',
+         '#type' => 'radios',
+         '#options' => $allowedoptions1,
+         '#default_value'=>variable_get('tripal_feature_browse_setting', 'show_feature_browser'),
+      );
+      $form['browser']['set_browse_button'] = array(
+         '#type' => 'submit',
+         '#value' => t('Set Browser'),
+         '#weight' => 2,
+      );
+
+      $form['summary'] = array(
+         '#type' => 'fieldset',
+         '#title' => t('Feature Summary')
+      );
+      $allowedoptions2 ['show_feature_summary'] = "Show the feature summary on the organism page. The summary loads when page loads.";
+      $allowedoptions2 ['hide_feature_summary'] = "Hide the feature summary on the organism page. Disables the feature summary.";
+
+      $form['summary']['feature_summary'] = array(
+         '#title' => 'Feature Summary on Organism Page',
+         '#description' => 'A feature summary can be added to an organism page to allow users to see the '.
+            'type and quantity of features available for the organism.',
+         '#type' => 'radios',
+         '#options' => $allowedoptions2,
+         '#default_value'=>variable_get('tripal_feature_summary_setting', 'show_feature_summary'),
+      );
+      $form['summary']['set_summary_button'] = array(
+         '#type' => 'submit',
+         '#value' => t('Set Summary'),
+         '#weight' => 2,
+      );
+
+      get_tripal_feature_admin_form_sync_set($form);
+      get_tripal_feature_admin_form_taxonomy_set($form);
+      get_tripal_feature_admin_form_reindex_set($form);
+      get_tripal_feature_admin_form_cleanup_set($form);
+   } else {
+      $form['notice'] = array(
+         '#type' => 'fieldset',
+         '#title' => t('Feature Management Temporarily Unavailable')
+      );
+      $form['notice']['message'] = array(
+         '#value' => t('Currently, feature management jobs are waiting or ".
+            "are running. Managemment features have been hidden until these ".
+            "jobs complete.  Please check back later once these jobs have ".
+            "finished.  You can view the status of pending jobs in the Tripal ".
+            "jobs page.'),
+      );
+   }
+   return system_settings_form($form);
+}
+
+/************************************************************************
+ *
+ */
+function tripal_feature_admin_validate($form, &$form_state) {
+   global $user;  // we need access to the user info
+   $job_args = array();
+
+   // if the user wants to sync up the chado features then
+   // add the job to the management queue
+   if ($form_state['values']['op'] == t('Sync all Features')) {
+      tripal_add_job('Sync all features','tripal_feature',
+         'tripal_feature_sync_features',$job_args,$user->uid);
+   }
+
+   if ($form_state['values']['op'] == t('Set/Reset Taxonomy for all feature nodes')) {
+      tripal_add_job('Set all feature taxonomy','tripal_feature',
+         'tripal_features_set_taxonomy',$job_args,$user->uid);
+   }
+
+   if ($form_state['values']['op'] == t('Reindex all feature nodes')) {
+      tripal_add_job('Reindex all features','tripal_feature',
+         'tripal_features_reindex',$job_args,$user->uid);
+   }
+
+   if ($form_state['values']['op'] == t('Clean up orphaned features')) {
+      tripal_add_job('Cleanup orphaned features','tripal_feature',
+         'tripal_features_cleanup',$job_args,$user->uid);
+   }
+
+   if ($form_state['values']['op'] == t('Set Browser')) {
+      variable_set('tripal_feature_browse_setting',$form_state['values']['browse_features']);
+   }
+
+   if ($form_state['values']['op'] == t('Set Summary')) {
+      variable_set('tripal_feature_summary_setting',$form_state['values']['feature_summary']);
+   }
+}
+/************************************************************************
+ *
+ */
+function get_tripal_feature_admin_form_cleanup_set(&$form) {
+   $form['cleanup'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('Clean Up')
+   );
+   $form['cleanup']['description'] = array(
+       '#type' => 'item',
+       '#value' => t("With Drupal and chado residing in different databases ".
+          "it is possible that nodes in Drupal and features in Chado become ".
+          "\"orphaned\".  This can occur if a feature node in Drupal is ".
+          "deleted but the corresponding chado feature is not and/or vice ".
+          "versa.  The Cleanup function will also remove nodes for features ".
+          "that are not in the list of allowed feature types as specified ".
+          "above.  This is helpful when a feature type needs to be ".
+          "removed but was previously present as Drupal nodes. ".
+          "Click the button below to resolve these discrepancies."),
+       '#weight' => 1,
+   );
+   $form['cleanup']['button'] = array(
+      '#type' => 'submit',
+      '#value' => t('Clean up orphaned features'),
+      '#weight' => 2,
+   );
+}
+/************************************************************************
+ *
+ */
+function get_tripal_feature_admin_form_reindex_set(&$form) {
+   $form['reindex'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('Reindex')
+   );
+   $form['reindex']['description'] = array(
+       '#type' => 'item',
+       '#value' => t("Reindexing of nodes is important when content for nodes ".
+          "is updated external to drupal, such as external uploads to chado. ".
+          "Features need to be reindexed to ensure that updates to features ".
+          "are searchable. Depending on the number of features this may take ".
+          "quite a while. Click the button below to begin reindexing of ".
+          "features."),
+       '#weight' => 1,
+   );
+   $form['reindex']['button'] = array(
+      '#type' => 'submit',
+      '#value' => t('Reindex all feature nodes'),
+      '#weight' => 2,
+   );
+}
+/************************************************************************
+ *
+ */
+function get_tripal_feature_admin_form_taxonomy_set (&$form) {
+
+
+   $form['taxonomy'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('Set Taxonomy')
+   );
+
+   $form['taxonomy']['description'] = array(
+       '#type' => 'item',
+       '#value' => t("Drupal allows for assignment of \"taxonomy\" or ".
+          "catagorical terms to nodes. These terms allow for advanced ".
+          "filtering during searching."),
+       '#weight' => 1,
+   );
+   $tax_options = array (
+      'organism' => t('Organism name'),
+      'feature_type'  => t('Feature Type (e.g. EST, mRNA, etc.)'),
+      'analysis' => t('Analysis Name'),
+      'library'  => t('Library Name'),
+   );
+   $form['taxonomy']['tax_classes'] = array (
+     '#title'       => t('Available Taxonomic Classes'),
+     '#type'        => t('checkboxes'),
+     '#description' => t("Please select the class of terms to assign to ".
+        "chado features"),
+     '#required'    => FALSE,
+     '#prefix'      => '<div id="taxclass_boxes">',
+     '#suffix'      => '</div>',
+     '#options'     => $tax_options,
+     '#weight'      => 2,
+     '#default_value' => variable_get('tax_classes',''),
+   );
+   $form['taxonomy']['button'] = array(
+      '#type' => 'submit',
+      '#value' => t('Set/Reset Taxonomy for all feature nodes'),
+      '#weight' => 3,
+   );
+
+}
+/************************************************************************
+ *
+ */
+function get_tripal_feature_admin_form_sync_set (&$form) {
+
+  
+   // get the list of organisms which will be synced.
+   $feature_sql = "SELECT * FROM {Feature} WHERE uniquename = '%s' and organism_id = %d";
+   $previous_db = tripal_db_set_active('chado');
+   $feature = db_fetch_object(db_query($feature_sql,$node->title,$node->organism_id));
+   tripal_db_set_active($previous_db);
+
+   // define the fieldsets
+   $form['sync'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('Sync Features')
+   );
+
+   $form['sync']['description'] = array(
+      '#type' => 'item',
+      '#value' => t("Click the 'Sync all Features' button to create Drupal ".
+         "content for features in chado. Only features of the types listed ".
+         "above in the Feature Types box will be synced. Depending on the ".
+         "number of features in the chado database this may take a long ".
+         "time to complete. "),
+      '#weight' => 1,
+   );
+
+   $orgs = tripal_organism_get_synced();   
+   $org_list = '';
+   foreach($orgs as $org){
+      $org_list .= "$org->genus $org->species, ";
+   }
+   $form['sync']['description2'] = array(
+      '#type' => 'item',
+      '#value' => "Only features for the following organisms will be synced: ".
+         " $org_list",
+      '#weight' => 1,
+   );
+
+   $form['sync']['button'] = array(
+      '#type' => 'submit',
+      '#value' => t('Sync all Features'),
+      '#weight' => 3,
+   );
+
+}

+ 465 - 0
tripal_feature/fasta_loader.php

@@ -0,0 +1,465 @@
+<?php
+
+function tripal_feature_fasta_load_form (){
+
+   $form['fasta_file']= array(
+      '#type'          => 'textfield',
+      '#title'         => t('FASTA File'),
+      '#description'   => t('Please enter the full system path for the FASTA file, or a path within the Drupal
+                             installation (e.g. /sites/default/files/xyz.obo).  The path must be accessible to the
+                             server on which this Drupal instance is running.'),
+      '#required' => TRUE,
+      '#weight'        => 1
+   );
+   // get the list of organisms
+   $sql = "SELECT * FROM {organism} ORDER BY genus, species";
+   $previous_db = tripal_db_set_active('chado');  // use chado database
+   $org_rset = db_query($sql);
+   tripal_db_set_active($previous_db);  // now use drupal database
+   $organisms = array();
+   $organisms[''] = '';
+   while($organism = db_fetch_object($org_rset)){
+      $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
+   }
+   $form['organism_id'] = array (
+     '#title'       => t('Organism'),
+     '#type'        => t('select'),
+     '#description' => t("Choose the organism to which these sequences are associated "),
+     '#required'    => TRUE,
+     '#options'     => $organisms,
+     '#weight'      => 2,
+   );
+   $form['type']= array(
+      '#type' => 'textfield',
+      '#title' => t('Sequence Type'),
+      '#required' => TRUE,
+      '#description' => t('Please enter the Sequence Ontology term that describes the sequences in the FASTA file.'),
+      '#weight' => 3
+   );
+
+
+   // get the list of organisms
+   $sql = "SELECT L.library_id, L.name, CVT.name as type
+           FROM {library} L
+              INNER JOIN {cvterm} CVT ON L.type_id = CVT.cvterm_id
+           ORDER BY name";
+   $previous_db = tripal_db_set_active('chado');  // use chado database
+   $lib_rset = db_query($sql);
+   tripal_db_set_active($previous_db);  // now use drupal database
+   $libraries = array();
+   $libraries[''] = '';
+   while($library = db_fetch_object($lib_rset)){
+      $libraries[$library->library_id] = "$library->name ($library->type)";
+   }
+//   $form['library_id'] = array (
+//     '#title'       => t('Library'),
+//     '#type'        => t('select'),
+//     '#description' => t("Choose the library to which these sequences are associated "),
+//     '#required'    => FALSE,
+//     '#options'     => $libraries,
+//     '#weight'      => 5,
+//   );
+   $form['update']= array(
+      '#type' => 'checkbox',
+      '#title' => t('Insert and update'),
+      '#required' => FALSE,
+      '#description' => t('By default only new features are inserted.  Select this checkbox to update
+                           features that already exists with the contents from the FASTA file.'),
+      '#weight' => 6
+   );
+
+   // Advanced Options
+   $form['advanced'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('Advanced Options'),
+      '#weight'=> 7,
+      '#collapsed' => TRUE
+   );
+   $form['advanced']['re_help']= array(
+      '#type' => 'item',
+      '#value' => t('A regular expression is an advanced method for extracting information from a string of text.  
+                     By default, this loader will use the first word in the definition line for each sequence in the FASTA file
+                     as the uniquename for the sequences.  If this is not desired, you may use the following regular 
+                     expressions to define the postions of the unique name.'),
+      '#weight' => 0
+   );
+   $form['advanced']['re_name']= array(
+      '#type' => 'textfield',
+      '#title' => t('Regular expression for the name'),
+      '#required' => FALSE,
+      '#description' => t('Enter the regular expression that will extract the feature name from the FASTA definition line. For example, for a defintion line with a name and uniquename separated by a bar \'|\' (>seqname|uniquename), the regular expression would be, "^(.*?)\|.*$"'),
+      '#weight' => 1
+   );   
+   $form['advanced']['re_uname']= array(
+      '#type' => 'textfield',
+      '#title' => t('Regular expression for the unique name'),
+      '#required' => FALSE,
+      '#description' => t('Enter the regular expression that will extract the unique feature name for each feature from the FASTA definition line.  This name must be unique for the organism.'),
+      '#weight' => 2
+   );   
+
+   // Advanced database cross-reference optoins
+   $form['advanced']['db'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('External Database Reference'),
+      '#weight'=> 6,
+      '#collapsed' => TRUE
+   );
+   $form['advanced']['db']['re_accession']= array(
+      '#type' => 'textfield',
+      '#title' => t('Regular expression for the accession'),
+      '#required' => FALSE,
+      '#description' => t('Enter the regular expression that will extract the accession for the external database for each feature from the FASTA definition line.'),
+      '#weight' => 2
+   ); 
+
+  // get the list of databases
+   $sql = "SELECT * FROM {db} ORDER BY name";
+   $previous_db = tripal_db_set_active('chado');  // use chado database
+   $db_rset = db_query($sql);
+   tripal_db_set_active($previous_db);  // now use drupal database
+   $dbs = array();
+   $dbs[''] = '';
+   while($db = db_fetch_object($db_rset)){
+      $dbs[$db->db_id] = "$db->name";
+   }
+   $form['advanced']['db']['db_id'] = array (
+     '#title'       => t('External Database'),
+     '#type'        => t('select'),
+     '#description' => t("Plese choose an external database for which these sequences have a cross reference."),
+     '#required'    => FALSE,
+     '#options'     => $dbs,
+     '#weight'      => 1,
+   );
+
+   $form['advanced']['relationship'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('Relationships'),
+      '#weight'=> 6,
+      '#collapsed' => TRUE
+   );
+   $rels = array();
+   $rels[''] = '';
+   $rels['part_of'] = 'part of';
+   $rels['derives_from'] = 'produced by';
+
+
+   // Advanced references options
+   $form['advanced']['relationship']['rel_type']= array(
+     '#title'       => t('Relationship Type'),
+     '#type'        => t('select'),
+     '#description' => t("Use this option to create associations, or relationships between the 
+                          features of this FASTA file and existing features in the database. For 
+                          example, to associate a FASTA file of peptides to existing genes or transcript sequence, 
+                          select the type 'produced by'. For a CDS sequences select the type 'part of'"),
+     '#required'    => FALSE,
+     '#options'     => $rels,
+     '#weight'      => 5,
+   );
+   $form['advanced']['relationship']['re_subject']= array(
+      '#type' => 'textfield',
+      '#title' => t('Regular expression for the parent'),
+      '#required' => FALSE,
+      '#description' => t('Enter the regular expression that will extract the unique 
+                           name needed to identify the existing sequence for which the 
+                           relationship type selected above will apply.'),
+      '#weight' => 6
+   ); 
+   $form['advanced']['relationship']['parent_type']= array(
+      '#type' => 'textfield',
+      '#title' => t('Parent Type'),
+      '#required' => FALSE,
+      '#description' => t('Please enter the Sequence Ontology term for the parent.  For example
+                           if the FASTA file being loaded is a set of proteins that are 
+                           products of genes, then use the SO term \'gene\' or \'transcript\' or equivalent. However,
+                           this type must match the type for already loaded features.'),
+      '#weight' => 7
+   );
+
+   $form['button'] = array(
+      '#type' => 'submit',
+      '#value' => t('Import FASTA file'),
+      '#weight' => 10,
+   );
+   return $form;   
+}
+/*************************************************************************
+*
+*/
+function tripal_feature_fasta_load_form_validate($form, &$form_state){
+   $fasta_file = trim($form_state['values']['fasta_file']);
+   $organism_id  = $form_state['values']['organism_id'];
+   $type         = trim($form_state['values']['type']);
+   $update       = trim($form_state['values']['update']);
+   $library_id   = $form_state['values']['library_id'];
+   $re_name      = trim($form_state['values']['re_name']);
+   $re_uname     = trim($form_state['values']['re_uname']);
+   $re_accession = trim($form_state['values']['re_accession']);
+   $db_id        = $form_state['values']['db_id'];
+   $rel_type     = $form_state['values']['rel_type'];
+   $re_subject   = trim($form_state['values']['re_subject']);
+   $parent_type   = trim($form_state['values']['parent_type']);
+
+   // check to see if the file is located local to Drupal
+   $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $fasta_file; 
+   if(!file_exists($dfile)){
+      // if not local to Drupal, the file must be someplace else, just use
+      // the full path provided
+      $dfile = $fasta_file;
+   }
+   if(!file_exists($dfile)){
+      form_set_error('fasta_file',t("Cannot find the file on the system"));
+   }
+
+   // make sure if a relationship is specified that all fields are provided.
+   if(($rel_type or $parent_type) and !$re_subject){
+      form_set_error('re_subject',t("Please provide a regular expression for the parent"));
+   }
+   if(($rel_type or $re_subject) and !$parent_type){
+      form_set_error('parent_type',t("Please provide a SO term for the parent"));
+   }
+   if(($parent_type or $re_subject) and !$rel_type){
+      form_set_error('rel_type',t("Please select a relationship type"));
+   }
+
+
+   // make sure if a database is specified that all fields are provided
+   if($db_id and !$re_accession){
+      form_set_error('re_accession',t("Please provide a regular expression for the accession"));
+   }
+   if($re_accession and !$db_id){
+      form_set_error('db_id',t("Please select a database"));
+   }
+
+   // check to make sure the types exists
+   $cvtermsql = "SELECT CVT.cvterm_id
+                 FROM {cvterm} CVT
+                    INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
+                    LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
+                 WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')";
+   $cvterm = db_fetch_object(db_query($cvtermsql,'sequence',$type,$type));
+   if(!$cvterm){
+      form_set_error('type',t("The Sequence Ontology (SO) term selected for the sequence type is not available in the database. Please check spelling or select another."));
+   }
+   if($rel_type){
+      $cvterm = db_fetch_object(db_query($cvtermsql,'sequence',$parent_type,$parent_type));
+      if(!$cvterm){
+         form_set_error('parent_type',t("The Sequence Ontology (SO) term selected for the parent relationship is not available in the database. Please check spelling or select another."));
+      }
+   }
+
+   // check to make sure the 'relationship' and 'sequence' ontologies are loaded
+   $form_state['storage']['dfile'] = $dfile;
+}
+/*************************************************************************
+*
+*/
+function tripal_feature_fasta_load_form_submit ($form, &$form_state){
+   global $user;
+
+   $dfile        = $form_state['storage']['dfile'];
+   $organism_id  = $form_state['values']['organism_id'];
+   $type         = trim($form_state['values']['type']);
+   $update       = trim($form_state['values']['update']);
+   $library_id   = $form_state['values']['library_id'];
+   $re_name      = trim($form_state['values']['re_name']);
+   $re_uname     = trim($form_state['values']['re_uname']);
+   $re_accession = trim($form_state['values']['re_accession']);
+   $db_id        = $form_state['values']['db_id'];
+   $rel_type     = $form_state['values']['rel_type'];
+   $re_subject   = trim($form_state['values']['re_subject']);
+   $parent_type   = trim($form_state['values']['parent_type']);
+
+   $args = array($dfile,$organism_id,$type,$library_id,$re_name,$re_uname,
+            $re_accession,$db_id,$rel_type,$re_subject,$parent_type,$update,$user->uid);
+
+   tripal_add_job("Import FASTA file: $dfile",'tripal_core',
+      'tripal_feature_load_fasta',$args,$user->uid);
+}
+/*************************************************************************
+*
+*/
+function tripal_feature_load_fasta($dfile, $organism_id, $type,
+   $library_id, $re_name, $re_uname, $re_accession, $db_id, $rel_type,
+   $re_subject, $parent_type, $update,$uid, $job = NULL)
+{
+
+   print "Opening FASTA file $dfile\n";
+
+    
+   $lines = file($dfile,FILE_SKIP_EMPTY_LINES);
+   $i = 0;
+
+   $name = '';
+   $residues = '';
+   $num_lines = sizeof($lines);
+   $interval = intval($num_lines * 0.01);
+   foreach ($lines as $line_num => $line) {
+      $i++;  // update the line count     
+
+      // update the job status every 1% features
+      if($job and $i % $interval == 0){
+         tripal_job_set_progress($job,intval(($i/$num_lines)*100));
+      }
+
+      // get the name, uniquename, accession and relationship subject from
+      // the definition line
+      if(preg_match('/^>/',$line)){
+         // if we have a feature name then we are starting a new sequence
+         // and we need to insert this one
+         if($name){
+           tripal_feature_fasta_loader_insert_feature($name,$uname,$db_id,
+              $accession,$subject,$rel_type,$parent_type,$library_id,$organism_id,$type,
+              $source,$residues,$update);
+           $residues = '';
+           $name = '';
+         }
+
+         $line = preg_replace("/^>/",'',$line);
+         if($re_name){
+            if(!preg_match("/$re_name/",$line,$matches)){
+               print "Regular expression for the feature name finds nothing\n";
+            }
+            $name = trim($matches[1]);
+         } else {
+            preg_match("/^(.*?)[\s\|].*$/",$line,$matches);
+            $name = trim($matches[1]);
+         }
+         if($re_uname){
+            preg_match("/$re_uname/",$line,$matches);
+            $uname = trim($matches[1]);
+         } else {
+            preg_match("/^(.*?)[\s\|].*$/",$line,$matches);
+            $name = trim($matches[1]);
+         }         
+         preg_match("/$re_accession/",$line,$matches);
+         $accession = trim($matches[1]);
+         preg_match("/$re_subject/",$line,$matches);
+         $subject = trim($matches[1]);
+//         print "Name: $name, UName: $uname, Accession: $accession, Subject: $subject\n";
+      }
+      else {
+         $residues .= trim($line);
+      }
+   }
+
+   return '';
+}
+/*************************************************************************
+*
+*/
+function tripal_feature_fasta_loader_insert_feature($name,$uname,$db_id,$accession,
+              $parent,$rel_type,$parent_type,$library_id,$organism_id,$type, 
+              $source,$residues,$update) 
+{
+   $previous_db = tripal_db_set_active('chado');
+
+   // first get the type for this sequence
+   $cvtermsql = "SELECT CVT.cvterm_id
+                 FROM {cvterm} CVT
+                    INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
+                    LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
+                 WHERE cv.name = '%s' and (CVT.name = '%s' or CVTS.synonym = '%s')";
+   $cvterm = db_fetch_object(db_query($cvtermsql,'sequence',$type,$type));
+   if(!$cvterm){
+      print "ERROR: cannot find the term type: '$type'\n";
+      return 0;
+   }
+
+   // check to see if this feature already exists
+   $feature_sql = "SELECT * FROM {feature} 
+                   WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
+   $feature = db_fetch_object(db_query($feature_sql,$organism_id,$uname,$cvterm->cvterm_id));
+   if(!$feature){
+      // now insert the feature
+      $sql = "INSERT INTO {feature} (organism_id, name, uniquename, residues, seqlen, md5checksum,type_id,is_analysis,is_obsolete)
+              VALUES(%d,'%s','%s','%s',%d, '%s', %d, %s, %s)";
+      $result = db_query($sql,$organism_id,$name,$uname,$residues,strlen($residues),
+                  md5($residues),$cvterm->cvterm_id,'false','false');
+      if(!$result){
+         print "ERROR: failed to insert feature '$name ($uname)'\n";
+         return 0;
+      } else {
+         print "Inserted feature $name ($uname)\n";
+      }
+   } else {
+       if($update){
+         $sql = "UPDATE {feature} 
+                  SET name = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
+                  WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
+         $result = db_query($sql,$name,$residues,strlen($residues),md5($residues),$organism_id,$uname,$cvterm->cvterm_id);
+         if(!$result){
+            print "ERROR: failed to update feature '$name ($uname)'\n";
+            return 0;
+         } else {
+            print "Updated feature $name ($uname)\n";
+         }
+      } else {
+         print "WARNING: feature already exists, skipping: '$name ($uname)'\n";
+      }
+   }
+   // now get the feature
+   $feature = db_fetch_object(db_query($feature_sql,$organism_id,$uname,$cvterm->cvterm_id));
+   if(!$feature){
+      print "Something bad has happened: $organism_id, $uname, $cvterm->cvterm_id\n";
+      return 0;
+   }
+
+   // now add the database cross reference
+   if($db_id){
+      // check to see if this accession reference exists, if not add it
+      $dbxrefsql = "SELECT * FROM {dbxref} WHERE db_id = %s and accession = '%s'";
+      $dbxref = db_fetch_object(db_query($dbxrefsql,$db_id,$accession));
+      if(!$dbxref){
+         $sql = "INSERT INTO {dbxref} (db_id,accession) VALUES (%d,'%s')";
+         $result = db_query($sql,$db_id,$accession);
+         if(!$result){
+           print "WARNING: could not add external database acession: '$name accession: $accession'\n";
+         }
+         $dbxref = db_fetch_object(db_query($dbxrefsql,$db_id,$accession));
+      }
+
+      // check to see if the feature dbxref record exists if not, then add it 
+      $fdbxrefsql = "SELECT * FROM {feature_dbxref} WHERE feature_id = %d and dbxref_id = %d";
+      $fdbxref = db_fetch_object(db_query($fdbxrefsql,$feature->feature_id,$dbxref->dbxref_id));
+      if(!$fdbxref){
+         $sql = "INSERT INTO {feature_dbxref} (feature_id,dbxref_id) VALUES (%d,%d)";
+         $result = db_query($sql,$feature->feature_id,$dbxref->dbxref_id);
+         if(!$result){
+            print "WARNING: could not associate database cross reference with feature: '$name accession: $accession'\n";
+         } else {
+            print "Added database crossreference $name ($uname) -> $accession\n";
+         }
+      }
+   }
+
+   // now add in the relationship if one exists.  First, get the parent type for the relationship
+   // then get the parent feature 
+   if($rel_type){
+      $parentcvterm = db_fetch_object(db_query($cvtermsql,'sequence',$parent_type,$parent_type));
+      $relcvterm = db_fetch_object(db_query($cvtermsql,'relationship',$rel_type,$rel_type));
+      $parent_feature = db_fetch_object(db_query($feature_sql,$organism_id,$parent,$parentcvterm->cvterm_id));
+      if($parent_feature){
+         // check to see if the relationship already exists
+         $sql = "SELECT * FROM {feature_relationship} WHERE subject_id = %d and object_id = %d and type_id = %d";
+         $rel = db_fetch_object(db_query($sql,$feature->feature_id,$parent_feature->feature_id,$relcvterm->cvterm_id));
+         if($rel){
+            print "WARNING: relationship already exists, skipping '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
+         } else {      
+            $sql = "INSERT INTO {feature_relationship} (subject_id,object_id,type_id)
+                    VALUES (%d,%d,%d)";
+            $result = db_query($sql,$feature->feature_id,$parent_feature->feature_id,$relcvterm->cvterm_id);
+            if(!$result){
+               print "WARNING: failed to insert feature relationship '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
+            } else {
+               print "Inserted relationship relationship: '$uname' ($type) $rel_type '$parent' ($parent_type)\n";
+            }
+         } 
+      }
+      else {
+         print "WARNING: cannot establish relationship '$uname' ($type) $rel_type '$parent' ($parent_type): Cannot find the parent\n";
+      }
+   }
+   tripal_db_set_active($previous_db);
+}
+

+ 10 - 6
tripal_feature/syncFeatures.php

@@ -1,8 +1,6 @@
 <?php
 
-//
-// Copyright 2009 Clemson University
-//
+
 # This script can be run as a stand-alone script to sync all the features from chado to drupal
 // Parameter f specifies the feature_id to sync
 // -f 0 will sync all features 
@@ -118,11 +116,11 @@ function tripal_feature_sync_features ($max_sync = 0, $job_id = NULL){
 }
 
 function tripal_feature_sync_feature ($feature_id){
-   print "\tfeature $feature_id\n";
+//   print "\tfeature $feature_id\n";
 
    $mem = memory_get_usage(TRUE);
    $mb = $mem/1048576;
-   print "$mb mb\n";
+//   print "$mb mb\n";
 
    global $user;
    $create_node = 1;   // set to 0 if the node exists and we just sync and not create
@@ -197,11 +195,17 @@ function tripal_feature_sync_feature ($feature_id){
    // create one.  Note that the node_save call in this block
    // will call the hook_submit function which
    if($create_node){
+      // get the organism for this feature
+      $sql = "SELECT * FROM {organism} WHERE organism_id = %d";
+      $organism = db_fetch_object(db_query($sql,$feature->organism_id));
+
       drupal_set_message(t("$feature_id: Creating node $feature->name"));
       $new_node = new stdClass();
       $new_node->type = 'chado_feature';
       $new_node->uid = $user->uid;
-      $new_node->title = "$feature->name";
+      $new_node->title = "$feature->uniquename ($feature->cvname) $organism->genus $organism->species";
+      $new_node->name = "$feature->name";
+      $new_node->uniquename = "$feature->uniquename";
       $new_node->feature_id = $feature->feature_id;
       $new_node->residues = $feature->residues;
       $new_node->organism_id = $feature->organism_id;

+ 133 - 314
tripal_feature/tripal_feature.module

@@ -1,11 +1,9 @@
 <?php
 
-//
-// Copyright 2009 Clemson University
-//
-
+require_once "admin.php";
 require_once "syncFeatures.php";
 require_once "indexFeatures.php";
+require_once "fasta_loader.php";
 
 /*************************************************************************
 *
@@ -29,289 +27,6 @@ function tripal_feature_views_api() {
    );
 }
 
-/************************************************************************
- *
- */
-function tripal_feature_admin () {
-
-   // before proceeding check to see if we have any
-   // currently processing jobs. If so, we don't want
-   // to give the opportunity to sync libraries
-   $active_jobs = FALSE;
-   if(tripal_get_module_active_jobs('tripal_feature')){
-      $active_jobs = TRUE;
-   }
-   if(!$active_jobs){
-
-      $form['chado_feature_accession_prefix'] = array (
-         '#title'       => t('Accession Prefix'),
-         '#type'        => t('textfield'),
-         '#description' => t("Accession numbers for features consist of the ".
-            "chado feature_id and a site specific prefix.  Set the prefix that ".
-            "will be incorporated in front of each feature_id to form a unique ".
-            "accession number for this site."),
-         '#required'    => TRUE,
-         '#default_value' => variable_get('chado_feature_accession_prefix','ID'),
-      );
-
-      $form['chado_feature_types'] = array(
-         '#title'       => t('Feature Types'),
-         '#type'        => 'textarea',
-         '#description' => t('Enter the names of the sequence types that the ".
-            "site will support with independent pages.  Pages for these data ".
-            "types will be built automatically for features that exist in the ".
-            "chado database.  The names listed here should be spearated by ".
-            "spaces or entered separately on new lines. The names must match ".
-            "exactly (spelling and case) with terms in the sequence ontology'),
-         '#required'    => TRUE,
-         '#default_value' => variable_get('chado_feature_types','EST contig'),
-      );
-
-      $form['browser'] = array(
-         '#type' => 'fieldset',
-         '#title' => t('Feature Browser')
-      );
-      $allowedoptions1  = array (
-        'show_feature_browser' => "Show the feature browser on the organism page. The browser loads when page loads. This may be slow for large sites.",
-        'hide_feature_browser' => "Hide the feature browser on the organism page. Disables the feature browser completely.",
-      );
-//      $allowedoptions ['allow_feature_browser'] = "Allow loading of the feature browsing through AJAX. For large sites the initial page load will be quick with the feature browser loading afterwards.";
-
-      $form['browser']['browse_features'] = array(
-         '#title' => 'Feature Browser on Organism Page',
-         '#description' => 'A feature browser can be added to an organism page to allow users to quickly '. 
-            'access a feature.  This will most likely not be the ideal mechanism for accessing feature '.
-            'information, especially for large sites, but it will alow users exploring the site (such '.
-            'as students) to better understand the data types available on the site.',
-         '#type' => 'radios',
-         '#options' => $allowedoptions1,
-         '#default_value'=>variable_get('tripal_feature_browse_setting', 'show_feature_browser'),
-      );
-      $form['browser']['set_browse_button'] = array(
-         '#type' => 'submit',
-         '#value' => t('Set Browser'),
-         '#weight' => 2,
-      );
-
-      $form['summary'] = array(
-         '#type' => 'fieldset',
-         '#title' => t('Feature Summary')
-      );
-      $allowedoptions2 ['show_feature_summary'] = "Show the feature summary on the organism page. The summary loads when page loads.";
-      $allowedoptions2 ['hide_feature_summary'] = "Hide the feature summary on the organism page. Disables the feature summary.";
-
-      $form['summary']['feature_summary'] = array(
-         '#title' => 'Feature Summary on Organism Page',
-         '#description' => 'A feature summary can be added to an organism page to allow users to see the '.
-            'type and quantity of features available for the organism.',
-         '#type' => 'radios',
-         '#options' => $allowedoptions2,
-         '#default_value'=>variable_get('tripal_feature_summary_setting', 'show_feature_summary'),
-      );
-      $form['summary']['set_summary_button'] = array(
-         '#type' => 'submit',
-         '#value' => t('Set Summary'),
-         '#weight' => 2,
-      );
-
-      get_tripal_feature_admin_form_sync_set($form);
-      get_tripal_feature_admin_form_taxonomy_set($form);
-      get_tripal_feature_admin_form_reindex_set($form);
-      get_tripal_feature_admin_form_cleanup_set($form);
-   } else {
-      $form['notice'] = array(
-         '#type' => 'fieldset',
-         '#title' => t('Feature Management Temporarily Unavailable')
-      );
-      $form['notice']['message'] = array(
-         '#value' => t('Currently, feature management jobs are waiting or ".
-            "are running. Managemment features have been hidden until these ".
-            "jobs complete.  Please check back later once these jobs have ".
-            "finished.  You can view the status of pending jobs in the Tripal ".
-            "jobs page.'),
-      );
-   }
-   return system_settings_form($form);
-}
-
-/************************************************************************
- *
- */
-function tripal_feature_admin_validate($form, &$form_state) {
-   global $user;  // we need access to the user info
-   $job_args = array();
-
-   // if the user wants to sync up the chado features then
-   // add the job to the management queue
-   if ($form_state['values']['op'] == t('Sync all Features')) {
-      tripal_add_job('Sync all features','tripal_feature',
-         'tripal_feature_sync_features',$job_args,$user->uid);
-   }
-
-   if ($form_state['values']['op'] == t('Set/Reset Taxonomy for all feature nodes')) {
-      tripal_add_job('Set all feature taxonomy','tripal_feature',
-         'tripal_features_set_taxonomy',$job_args,$user->uid);
-   }
-
-   if ($form_state['values']['op'] == t('Reindex all feature nodes')) {
-      tripal_add_job('Reindex all features','tripal_feature',
-         'tripal_features_reindex',$job_args,$user->uid);
-   }
-
-   if ($form_state['values']['op'] == t('Clean up orphaned features')) {
-      tripal_add_job('Cleanup orphaned features','tripal_feature',
-         'tripal_features_cleanup',$job_args,$user->uid);
-   }
-
-   if ($form_state['values']['op'] == t('Set Browser')) {
-      variable_set('tripal_feature_browse_setting',$form_state['values']['browse_features']);
-   }
-
-   if ($form_state['values']['op'] == t('Set Summary')) {
-      variable_set('tripal_feature_summary_setting',$form_state['values']['feature_summary']);
-   }
-}
-/************************************************************************
- *
- */
-function get_tripal_feature_admin_form_cleanup_set(&$form) {
-   $form['cleanup'] = array(
-      '#type' => 'fieldset',
-      '#title' => t('Clean Up')
-   );
-   $form['cleanup']['description'] = array(
-       '#type' => 'item',
-       '#value' => t("With Drupal and chado residing in different databases ".
-          "it is possible that nodes in Drupal and features in Chado become ".
-          "\"orphaned\".  This can occur if a feature node in Drupal is ".
-          "deleted but the corresponding chado feature is not and/or vice ".
-          "versa.  The Cleanup function will also remove nodes for features ".
-          "that are not in the list of allowed feature types as specified ".
-          "above.  This is helpful when a feature type needs to be ".
-          "removed but was previously present as Drupal nodes. ".
-          "Click the button below to resolve these discrepancies."),
-       '#weight' => 1,
-   );
-   $form['cleanup']['button'] = array(
-      '#type' => 'submit',
-      '#value' => t('Clean up orphaned features'),
-      '#weight' => 2,
-   );
-}
-/************************************************************************
- *
- */
-function get_tripal_feature_admin_form_reindex_set(&$form) {
-   $form['reindex'] = array(
-      '#type' => 'fieldset',
-      '#title' => t('Reindex')
-   );
-   $form['reindex']['description'] = array(
-       '#type' => 'item',
-       '#value' => t("Reindexing of nodes is important when content for nodes ".
-          "is updated external to drupal, such as external uploads to chado. ".
-          "Features need to be reindexed to ensure that updates to features ".
-          "are searchable. Depending on the number of features this may take ".
-          "quite a while. Click the button below to begin reindexing of ".
-          "features."),
-       '#weight' => 1,
-   );
-   $form['reindex']['button'] = array(
-      '#type' => 'submit',
-      '#value' => t('Reindex all feature nodes'),
-      '#weight' => 2,
-   );
-}
-/************************************************************************
- *
- */
-function get_tripal_feature_admin_form_taxonomy_set (&$form) {
-
-
-   $form['taxonomy'] = array(
-      '#type' => 'fieldset',
-      '#title' => t('Set Taxonomy')
-   );
-
-   $form['taxonomy']['description'] = array(
-       '#type' => 'item',
-       '#value' => t("Drupal allows for assignment of \"taxonomy\" or ".
-          "catagorical terms to nodes. These terms allow for advanced ".
-          "filtering during searching."),
-       '#weight' => 1,
-   );
-   $tax_options = array (
-      'organism' => t('Organism name'),
-      'feature_type'  => t('Feature Type (e.g. EST, mRNA, etc.)'),
-      'analysis' => t('Analysis Name'),
-      'library'  => t('Library Name'),
-   );
-   $form['taxonomy']['tax_classes'] = array (
-     '#title'       => t('Available Taxonomic Classes'),
-     '#type'        => t('checkboxes'),
-     '#description' => t("Please select the class of terms to assign to ".
-        "chado features"),
-     '#required'    => FALSE,
-     '#prefix'      => '<div id="taxclass_boxes">',
-     '#suffix'      => '</div>',
-     '#options'     => $tax_options,
-     '#weight'      => 2,
-     '#default_value' => variable_get('tax_classes',''),
-   );
-   $form['taxonomy']['button'] = array(
-      '#type' => 'submit',
-      '#value' => t('Set/Reset Taxonomy for all feature nodes'),
-      '#weight' => 3,
-   );
-
-}
-/************************************************************************
- *
- */
-function get_tripal_feature_admin_form_sync_set (&$form) {
-
-  
-   // get the list of organisms which will be synced.
-   $feature_sql = "SELECT * FROM {Feature} WHERE uniquename = '%s' and organism_id = %d";
-   $previous_db = tripal_db_set_active('chado');
-   $feature = db_fetch_object(db_query($feature_sql,$node->title,$node->organism_id));
-   tripal_db_set_active($previous_db);
-
-   // define the fieldsets
-   $form['sync'] = array(
-      '#type' => 'fieldset',
-      '#title' => t('Sync Features')
-   );
-
-   $form['sync']['description'] = array(
-      '#type' => 'item',
-      '#value' => t("Click the 'Sync all Features' button to create Drupal ".
-         "content for features in chado. Only features of the types listed ".
-         "above in the Feature Types box will be synced. Depending on the ".
-         "number of features in the chado database this may take a long ".
-         "time to complete. "),
-      '#weight' => 1,
-   );
-
-   $orgs = tripal_organism_get_synced();   
-   $org_list = '';
-   foreach($orgs as $org){
-      $org_list .= "$org->genus $org->species, ";
-   }
-   $form['sync']['description2'] = array(
-      '#type' => 'item',
-      '#value' => "Only features for the following organisms will be synced: ".
-         " $org_list",
-      '#weight' => 1,
-   );
-
-   $form['sync']['button'] = array(
-      '#type' => 'submit',
-      '#value' => t('Sync all Features'),
-      '#weight' => 3,
-   );
-
-}
 /************************************************************************
  * Display help and module information
  * @param path which path of the site we're displaying help
@@ -404,6 +119,14 @@ function tripal_feature_menu() {
      'access arguments' => array('administer site configuration'),
      'type' => MENU_NORMAL_ITEM,
    );
+   $items['admin/tripal/fasta_loader'] = array(
+     'title' => 'Import a multi-FASTA file',
+     'description' => 'Load sequences from a multi-FASTA file into Chado',
+     'page callback' => 'drupal_get_form',
+     'page arguments' => array('tripal_feature_fasta_load_form'),
+     'access arguments' => array('administer site configuration'),
+     'type' => MENU_NORMAL_ITEM,
+   );
 
    $items['admin/settings/tripal/tripal_feature/load'] = array(
      'title' => 'Bulk Load',
@@ -428,9 +151,12 @@ function chado_feature_insert($node){
    // If this feature already exists then don't recreate it in chado
    // TODO: the unique index in chado for this also includes the type_id. If the site
    // ever needs to have the same feature name for different types then this will break.
-   $feature_sql = "SELECT * FROM {Feature} WHERE uniquename = '%s' and organism_id = %d";
+   $feature_sql = "SELECT * 
+                   FROM {Feature} F
+                     INNER JOIN {cvterm} CVT ON F.type_id = CVT.cvterm_id
+                   WHERE uniquename = '%s' and organism_id = %d and CVT.name = '%s'";
    $previous_db = tripal_db_set_active('chado');
-   $feature = db_fetch_object(db_query($feature_sql,$node->title,$node->organism_id));
+   $feature = db_fetch_object(db_query($feature_sql,$node->uniquename,$node->organism_id,$node->feature_type));
    tripal_db_set_active($previous_db);
 
    // if the feature doesn't exist then let's create it in chado.
@@ -449,11 +175,11 @@ function chado_feature_insert($node){
 
       // use chado database
       $previous_db = tripal_db_set_active('chado');
-      db_query($sql,$node->organism_id,$node->title,$node->title,
+      db_query($sql,$node->organism_id,$node->name,$node->uniquename,
       $residues,strlen($residues),$obsolete,$node->feature_type);
 
       // now that we've added the feature, get the feature id for this feature
-      $feature = db_fetch_object(db_query($feature_sql,$node->title,$node->organism_id));
+      $feature = db_fetch_object(db_query($feature_sql,$node->uniquename,$node->organism_id,$node->feature_type));
 
       // now use drupal database
       tripal_db_set_active($previous_db);
@@ -539,7 +265,7 @@ function chado_feature_update($node){
          $obsolete = 'TRUE';
       }
       $previous_db = tripal_db_set_active('chado');  // use chado database
-      db_query($sql,$residues,$node->title,$node->title,
+      db_query($sql,$residues,$node->name,$node->uniquename,
       strlen($residues),$node->organism_id,$obsolete,$node->feature_type,
       $feature->feature_id);
       tripal_db_set_active($previous_db);  // now use drupal database
@@ -714,11 +440,31 @@ function chado_feature_form ($node,$param){
    );
 
    $form['title']= array(
+      '#type' => 'textfield',
+      '#title' => t('Title'),
+      '#required' => TRUE,
+      '#default_value' => $feature->featurename,
+      '#description' => t('The title must be a unique identifier for this feature.  It is recommended to use a combination of uniquename, organism and feature type in the title as this is guranteed to be unique.'),
+      '#weight' => 1,
+      '#maxlength' => 255
+   );
+
+   $form['uniquename']= array(
       '#type' => 'textfield',
       '#title' => t('Unique Feature Name'),
       '#required' => TRUE,
       '#default_value' => $feature->featurename,
-      '#description' => t('Enter a unique name for this feature'),
+      '#description' => t('Enter a unique name for this feature.  This name must be unique for the organism and feature type.'),
+      '#weight' => 1,
+      '#maxlength' => 255
+   );
+
+   $form['name']= array(
+      '#type' => 'textfield',
+      '#title' => t('Feature Name'),
+      '#required' => TRUE,
+      '#default_value' => $feature->featurename,
+      '#description' => t('Enter the name used by humans to refer to this feature.'),
       '#weight' => 1,
       '#maxlength' => 255
    );
@@ -815,25 +561,32 @@ function chado_feature_validate($node){
    // the organism doesn't already have this uniquename. We don't want to give
    // two sequences the same uniquename
    if($node->feature_id){
-      $sql = "SELECT * FROM {Feature} WHERE uniquename = '%s' ".
-             "  AND organism_id = %d AND NOT feature_id = %d";
+      $sql = "SELECT * 
+              FROM {Feature} F
+                INNER JOIN {cvterm} CVT ON F.type_id = CVT.cvterm_id
+              WHERE uniquename = '%s' 
+               AND organism_id = %d AND CVT.name = '%s' AND NOT feature_id = %d";
       $previous_db = tripal_db_set_active('chado');
-      $result = db_fetch_object(db_query($sql, $node->title,$node->organism_id,$node->feature_id));
+      $result = db_fetch_object(db_query($sql, $node->uniquename,$node->organism_id,$node->feature_type,$node->feature_id));
       tripal_db_set_active($previous_db);
       if($result){
-         form_set_error('title',t("Feature update cannot proceed. The feature name '$node->title' is not unique for this organism. Please provide a unique name for this feature. "));
+         form_set_error('uniquename',t("Feature update cannot proceed. The feature name '$node->uniquename' is not unique for this organism. Please provide a unique name for this feature. "));
       }
    }
 
    // if this is an insert then we just need to make sure this name doesn't
    // already exist for this organism if it does then we need to throw an error
    else {
-      $sql = "SELECT * FROM {Feature} WHERE uniquename = '%s' AND organism_id = %d";
+      $sql = "SELECT * 
+              FROM {Feature} F
+                INNER JOIN {cvterm} CVT ON F.type_id = CVT.cvterm_id
+              WHERE uniquename = '%s' 
+               AND organism_id = %d AND CVT.name = '%s'";
       $previous_db = tripal_db_set_active('chado');
-      $result = db_fetch_object(db_query($sql, $node->title,$node->organism_id));
+      $result = db_fetch_object(db_query($sql, $node->uniquename,$node->organism_id,$node->feature_type));
       tripal_db_set_active($previous_db);
       if($result){
-         form_set_error('title',t("Feature insert cannot proceed. The feature name '$node->title' already exists for this organism. Please provide a unique name for this feature. "));
+         form_set_error('uniquename',t("Feature insert cannot proceed. The feature name '$node->uniquename' already exists for this organism. Please provide a unique name for this feature. "));
       }
    }
 
@@ -843,7 +596,7 @@ function chado_feature_validate($node){
    if($node->residues){
       $residues = preg_replace("/[^\w]/",'',$node->residues);
       if(!preg_match("/^[ACTGURYMKSWBDHVN]+$/i",$residues)){
-         form_set_error('residues',t("The residues in feature $node->title contains more than the nucleotide IUPAC characters. Only the following characters are allowed: A,C,T,G,U,R,Y,M,K,S,W,B,D,H,V,N: '" . $residues ."'"));
+         form_set_error('residues',t("The residues in feature $node->name contains more than the nucleotide IUPAC characters. Only the following characters are allowed: A,C,T,G,U,R,Y,M,K,S,W,B,D,H,V,N: '" . $residues ."'"));
       }
    }
 
@@ -860,11 +613,10 @@ function chado_feature_validate($node){
  *  to add auxiliary data to the node object.
  */
 function chado_feature_load($node){
-   // get the feature_id for this node:
+   // add the feature_id for this node:
    $sql = 'SELECT feature_id FROM {chado_feature} WHERE vid = %d';
    $map = db_fetch_object(db_query($sql, $node->vid));
 
-   $previous_db = tripal_db_set_active('chado');  // use chado database
 
    // get information about this organism and add it to the items in this node
    $sql = "SELECT F.feature_id, F.name as featurename, F.uniquename, ".
@@ -874,17 +626,26 @@ function chado_feature_load($node){
           "  INNER JOIN Organism O ON F.organism_id = O.organism_id ".
           "  INNER JOIN CVterm CVT ON F.type_id = CVT.cvterm_id ".
           "WHERE F.feature_id = %d";
+   $previous_db = tripal_db_set_active('chado');  // use chado database
    $feature = db_fetch_object(db_query($sql,$map->feature_id));
+   tripal_db_set_active($previous_db);  // now use drupal database
    $additions->feature = $feature;
    $additions->seqlen = $feature->seqlen;
+   // add organism node nid
+   $sql = "SELECT nid FROM {chado_organism} WHERE organism_id = %d";
+   $org_nid = db_result(db_query($sql, $additions->feature->organism_id));
+   $additions->org_nid = $org_nid;
+   $additions->accession =  variable_get('chado_feature_accession_prefix','ID') . $feature->feature_id;
    
-   // get the feature synonyms
+   // add the feature synonyms
    $sql = "SELECT S.name ".
           "FROM {Feature_Synonym} FS ".
           "  INNER JOIN Synonym S ".
           "    ON FS.synonym_id = S.Synonym_id ".
           "WHERE FS.feature_id = %d";
+   $previous_db = tripal_db_set_active('chado');  // use chado database
    $results = db_query($sql,$map->feature_id);
+   tripal_db_set_active($previous_db);  // now use drupal database
    $synonyms = array();
    $i=0;
    while($synonym = db_fetch_object($results)){
@@ -892,7 +653,7 @@ function chado_feature_load($node){
    }
    $additions->synonyms = $synonyms;
 
-   // get feature references in external databases
+   // add feature references in external databases
    $sql = "SELECT F.uniquename,F.Feature_id,DBX.accession,DB.description as dbdesc, ".
           "   DB.db_id, DB.name as db_name, DB.urlprefix ".
           "FROM {Feature} F ".
@@ -900,23 +661,81 @@ function chado_feature_load($node){
           "  INNER JOIN Dbxref DBX on DBX.dbxref_id = FDBX.dbxref_id ".
           "  INNER JOIN DB on DB.db_id = DBX.db_id ".
           "WHERE F.feature_id = %d";
+   $previous_db = tripal_db_set_active('chado');  // use chado database
    $results = db_query($sql,$map->feature_id);
+   tripal_db_set_active($previous_db);  // now use drupal database
    $references = array();
    $i=0;
    while($accession = db_fetch_object($results)){
       $references[$i++] = $accession;
-      // we want to specifically pull out the genbank id
-      if(preg_match("/Genbank_est/",$accession->db_name)){
-         $additions->gbaccession = $accession;
-      }
    }
    $additions->references = $references;
+
+
+   // add the feature relationsips
+   $sql = "SELECT 
+             FS.name as subject_name, 
+             FS.uniquename as subject_uniquename, 
+             CVTS.name as subject_type,
+             FS.residues as subject_residues,
+             FR.subject_id,          
+             FR.type_id,
+             CVT.name as rel_type,     
+             FO.name as object_name, 
+             FO.uniquename as object_uniquename, 
+             CVTO.name as object_type,
+             FO.residues as object_residues,
+             FR.object_id
+           FROM {feature_relationship} FR
+             INNER JOIN {cvterm} CVT ON FR.type_id = CVT.cvterm_id
+             INNER JOIN {feature} FS ON FS.feature_id = FR.subject_id
+             INNER JOIN {feature} FO ON FO.feature_id = FR.object_id
+             INNER JOIN {cvterm} CVTO ON FO.type_id = CVTO.cvterm_id
+             INNER JOIN {cvterm} CVTS ON FS.type_id = CVTS.cvterm_id ";
+   $osql = "$sql  WHERE FR.object_id = %d";
+   $ssql = "$sql  WHERE FR.subject_id = %d";
+
+   // first get the relationships where this feature is the object
+   // then where it is the subject 
+   $previous_db = tripal_db_set_active('chado');  // use chado database
+   $oresults = db_query($osql, $map->feature_id);
+   $sresults = db_query($ssql, $map->feature_id);
    tripal_db_set_active($previous_db);  // now use drupal database
 
-   // get organism node nid
-   $sql = "SELECT nid FROM {chado_organism} WHERE organism_id = %d";
-   $org_nid = db_result(db_query($sql, $additions->feature->organism_id));
-   $additions->org_nid = $org_nid;
+   // identify the drupal node for each feature and add that to the 
+   // relationship records
+   $srelationships = array();
+   $orelationships = array();
+   $i=0;
+   $nodesql = "SELECT nid FROM {chado_feature} WHERE feature_id = %d";
+   while($rel = db_fetch_object($oresults)){
+     $node = db_fetch_object(db_query($nodesql,$rel->subject_id));
+     if($node){
+        $rel->subject_nid = $node->nid;
+     }  
+     $node = db_fetch_object(db_query($nodesql,$rel->object_id));
+     if($node){
+        $rel->object_nid = $node->nid;
+     }  
+     $orelationships[$i++] = $rel;      
+   }
+   $i=0;
+   while($rel = db_fetch_object($sresults)){
+     $node = db_fetch_object(db_query($nodesql,$rel->subject_id));
+     if($node){
+        $rel->subject_nid = $node->nid;
+     }  
+     $node = db_fetch_object(db_query($nodesql,$rel->object_id));
+     if($node){
+        $rel->object_nid = $node->nid;
+     }  
+     $srelationships[$i++] = $rel;      
+   }
+   $additions->orelationships = $orelationships;
+   $additions->srelationships = $srelationships;
+   // add the feature properties
+   
+
    return $additions;
 }
 
@@ -1486,7 +1305,7 @@ function tripal_feature_set_taxonomy ($node,$feature_id){
    // now add the taxonomy to the node
    $terms['tags'] = $tags;
    taxonomy_node_save($node,$terms);
-   //   print "Setting $node->title: " . implode(", ",$tags) . "\n";
+   //   print "Setting $node->name: " . implode(", ",$tags) . "\n";
 
    // get the analysis that this feature may belong to and add it as taxonomy
    // We'll add each one individually since there may be more than one analysis