Selaa lähdekoodia

The GAF loader did not associate the GO terms with the analysis which prevented the GO module from making reports for it

spficklin 13 vuotta sitten
vanhempi
commit
1e57b551c2

+ 107 - 44
tripal_analysis_go/gaf_loader.inc

@@ -27,12 +27,6 @@ function tripal_analysis_go_gaf_load_form (){
    while($organism = db_fetch_object($org_rset)){
       $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
    }
-   $form['type']= array(
-      '#type' => 'textfield',
-      '#title' => t('Sequence Type'),
-      '#required' => TRUE,
-      '#description' => t('Please enter the Sequence Ontology term that describes the sequences in the GAF file.'),
-   );
    $form['organism_id'] = array (
      '#title'       => t('Organism'),
      '#type'        => t('select'),
@@ -40,6 +34,20 @@ function tripal_analysis_go_gaf_load_form (){
      '#required'    => TRUE,
      '#options'     => $organisms,
    );
+   $form['seq_type']= array(
+      '#type' => 'textfield',
+      '#title' => t('Sequence Type'),
+      '#required' => TRUE,
+      '#description' => t('Please enter the Sequence Ontology term that describes the sequences in the GAF file.'),
+   );
+	$form['query_uniquename'] = array(
+      '#title' => t('Use Unique Name'),
+      '#type' => 'checkbox',
+      '#description' => t('Select this checboxk if the feature name in the GAF file '.
+        'matches the uniquename in the database.  By default, the feature will '.
+        'be mapped to the "name" of the feature.'),
+      '#default_value' => $query_uniquename,
+	);
    $form['import_options'] = array(
       '#type' => 'fieldset',
       '#title' => t('Import Options'),
@@ -107,15 +115,12 @@ function tripal_analysis_go_gaf_load_form (){
       '#type' => 'textfield',
       '#title' => t('Regular expression for the name'),
       '#required' => FALSE,
-      '#description' => t('Enter the regular expression that will extract the feature name. For example, for text with a name and uniquename separated by a bar \'|\' (>seqname|uniquename), the regular expression would be, "^(.*?)\|.*$".  the name must be unique for this organism and sequence type.' ),
-   );   
-
-   $form['advanced']['re_uname']= array(
-      '#type' => 'textfield',
-      '#title' => t('Regular expression for the unique name'),
-      '#required' => FALSE,
-      '#description' => t('Enter the regular expression that will extract the unique feature name.  For example, for text with a name and uniquename separated by a bar \'|\' (>seqname|uniquename), the regular expression would be, "^.*?\|(.*)$".  the name must be unique for this organism and sequence type.' ),
+      '#description' => t('Enter the regular expression that will extract the '.
+         'feature name from the GAF file. This option is '.
+         'is only required when the feature identifier does not identically match a feature '.
+         'in the database.'),
    );   
+ 
 
    $form['button'] = array(
       '#type' => 'submit',
@@ -136,9 +141,9 @@ function tripal_analysis_go_gaf_load_form_validate ($form, &$form_state){
    $remove   = $form_state['values']['remove'];
    $replace  = $form_state['values']['replace'];
    $analysis_id = $form_state['values']['analysis_id'];
-   $type      = trim($form_state['values']['type']);
+   $type      = trim($form_state['values']['seq_type']);
    $re_name      = trim($form_state['values']['re_name']);
-   $re_uname     = trim($form_state['values']['re_uname']);
+   $query_uniquename = $form_state['values']['query_uniquename'];
 
 
    // check to see if the file is located local to Drupal
@@ -158,10 +163,6 @@ function tripal_analysis_go_gaf_load_form_validate ($form, &$form_state){
        form_set_error('add_only',t("Please select only one checkbox from the import options section"));
    }
 
-   if($re_name and $re_uname){
-     form_set_error('re_name',t("Please provide a regular expression for the name or the unique name only, not both."));
-   }
-
    // check to make sure the types exists
    $cvtermsql = "SELECT CVT.cvterm_id
                  FROM {cvterm} CVT
@@ -193,12 +194,12 @@ function tripal_analysis_go_gaf_load_form_submit ($form, &$form_state){
    $remove   = $form_state['values']['remove'];
    $replace  = $form_state['values']['replace'];
    $analysis_id = $form_state['values']['analysis_id'];
-   $type      = trim($form_state['values']['type']);
+   $type      = trim($form_state['values']['seq_type']);
    $re_name      = trim($form_state['values']['re_name']);
-   $re_uname     = trim($form_state['values']['re_uname']);
-
+   $query_uniquename = $form_state['values']['query_uniquename'];
 
-   $args = array($gaf_file,$organism_id,$analysis_id,$add_only,$replace,$remove,$re_name,$re_uname,$type);
+   $args = array($gaf_file,$organism_id,$analysis_id,$add_only,$replace,
+      $remove,$re_name,$type,$query_uniquename);
    if($add_only){
      $type = 'add GO terms';
    }
@@ -220,7 +221,8 @@ function tripal_analysis_go_gaf_load_form_submit ($form, &$form_state){
  * @ingroup gff3_loader
  */
 function tripal_analysis_go_load_gaf($gaf_file, $organism_id,$analysis_id,$add_only =0, 
-   $replace = 0, $remove = 0, $re_name, $re_uname, $type, $job = NULL)
+   $replace = 0, $remove = 0, $re_name, $type, $query_uniquename,
+   $job = NULL)
 {
    print "Opening GAF file $gaf_file\n";
 
@@ -275,8 +277,7 @@ function tripal_analysis_go_load_gaf($gaf_file, $organism_id,$analysis_id,$add_o
       $product   = $cols[16];
 
       // get the name or uniquename for the feature
-      $uname = $object;
-      $name = '';
+      $name = $object;
       if($re_name){
          if(!preg_match("/$re_name/",$object,$matches)){
             print "Regular expression for the feature name finds nothing\n";
@@ -284,16 +285,11 @@ function tripal_analysis_go_load_gaf($gaf_file, $organism_id,$analysis_id,$add_o
             $name = trim($matches[1]);
          }
       } else {
-         preg_match("/^\s*(.*?)[\s\|].*$/",$object,$matches);
-         $name = trim($matches[1]);
-      }
-      if($re_uname){
-         if(!preg_match("/$re_uname/",$object,$matches)){
-            print "Regular expression for the feature unique name finds nothing\n";
-         } else {
-            $uname = trim($matches[1]);
+         if(preg_match("/^\s*(.*?)[\s\|].*$/",$object,$matches)){
+            $name = trim($matches[1]);
          }
       }
+      
 
       // get the feature
       $values = array(
@@ -305,23 +301,25 @@ function tripal_analysis_go_load_gaf($gaf_file, $organism_id,$analysis_id,$add_o
          ),
          'organism_id' => $organism_id,
       );
-      if($name){
+      if(!$query_uniquename){
         $values['name'] = $name;
-      } 
-      if($uname){
-        $values['uniquename'] = $uname;
+      } else {
+        $values['uniquename'] = $name;
       } 
       $feature = tripal_core_chado_select('feature',array('*'),$values);
-
-      // add the GO term to the feature
-      tripal_analysis_go_load_gaff_go_term($feature[0],$go_id,$remove);
+      if(count($feature) == 0){
+         print "WARNING: Cannot find the feature: '$name'\n";         
+      } else {
+         // add the GO term to the feature
+         tripal_analysis_go_load_gaff_go_term($feature[0],$go_id,$remove,$analysis_id);
+      }
    }
    return 1;
 }
 /**
 *
 */
-function tripal_analysis_go_load_gaff_go_term($feature,$dbxref,$remove){
+function tripal_analysis_go_load_gaff_go_term($feature,$dbxref,$remove,$analysis_id){
 
    // get the database name from the reference.  If it doesn't exist then create one.
    $ref = explode(":",$dbxref);
@@ -381,7 +379,7 @@ function tripal_analysis_go_load_gaff_go_term($feature,$dbxref,$remove){
       if($ret){
          print "   Added ontology term $dbname:$accession to feature $feature->uniquename\n";
       } else {
-         print "ERROR: failed to insert ontology term: $dbname:$accession\n";
+         print "ERROR: failed to insert ontology term '$dbname:$accession' for feature: $feature\n";
          return 0;
       }
    } else {
@@ -399,5 +397,70 @@ function tripal_analysis_go_load_gaff_go_term($feature,$dbxref,$remove){
       }
    }
 
+   if(!$remove){
+      print "   Associating feature $feature->name to analysis\n";
+      // Insert into analysisfeature table only if it doesn't already exist
+      $values = array('feature_id' => $feature->feature_id, 'analysis_id' => $analysis_id);
+      $analysisfeature = tripal_core_chado_select('analysisfeature',array('*'),$values);
+      if(sizeof($analysisfeature) == 0){
+         $analysisfeature = tripal_core_chado_insert('analysisfeature',$values);
+         $analysisfeature_id = $analysisfeature['analysisfeature_id'];
+      } else {
+         $analysisfeature_id = $analysisfeature[0]->analysisfeature_id;
+      }
+
+      // Insert GO terms into analysisfeatureprop table
+      $values = array('analysisfeature_id' => $analysisfeature_id,
+                      'type_id' => $cvterm->cvterm_id,
+                      'rank' => 0);
+      $analysisfeatureprop = tripal_core_chado_select('analysisfeatureprop',array('*'),$values);
+      if(sizeof($analysisfeatureprop) == 0){
+         $values['value'] = $matches[1];
+         $analysisfeatureprop = tripal_core_chado_insert('analysisfeatureprop',$values);
+      }
+   }
    return 1;
 }
+/**
+*
+*/
+function tripal_analysis_go_load_gaff_insert_analysisfeatureprop ($feature_id, $analysis_id,
+   $brite_id,$keggterm)
+{
+
+   // add the analysisfeature record if it doesn't already exist.
+   $values = array('feature_id' => $feature_id,'analysis_id' => $analysis_id);
+   $analysisfeature_arr = tripal_core_chado_select('analysisfeature',
+      array('analysisfeature_id'),$values);
+   if(count($analysisfeature_arr) == 0){
+      tripal_core_chado_insert('analysisfeature',$values);
+      $analysisfeature_arr = tripal_core_chado_select('analysisfeature',
+         array('analysisfeature_id'),$values);
+   }
+   $analysisfeature_id = $analysisfeature_arr[0]->analysisfeature_id;            
+   // Insert into analysisfeatureprop if the value doesn't already exist
+   // KEGG heir results sometimes have the same record more than once.
+   if($analysisfeature_id){
+      // Get the highest rank for this feature_id in analysisfeatureprop table
+      $sql = "SELECT rank FROM analysisfeatureprop WHERE analysisfeature_id = %d and type_id = %d ORDER BY rank DESC";
+      $previous_db = tripal_db_set_active('chado');
+      $result = db_fetch_object(db_query($sql,$analysisfeature_id,$brite_id));
+      tripal_db_set_active($previous);
+      $rank = 0;
+      if ($result and $result->rank > 0) {
+         $rank = $result->rank + 1;
+      }
+
+      $values = array(
+         'analysisfeature_id' => $analysisfeature_id, 
+         'type_id' => $brite_id,
+         'value' => $keggterm,
+         'rank' => $rank,
+      );
+
+      return tripal_core_chado_insert('analysisfeatureprop',$values);
+   }
+   else {
+      return 0;
+   }
+}

+ 11 - 1
tripal_analysis_go/theme/tripal_organism/tripal_organism_go_summary.tpl.php

@@ -9,7 +9,17 @@
      if($has_results){
         print $form;
      } else {
-       ?><div class="tripal-no-results">There are no GO reports avaialble</div><?php
+       ?><div class="tripal-no-results">
+           There are no GO reports avaialble
+           <?php if(user_access('access administration pages')){ ?>
+              <p><br>Administrators, to view a GO report you must:
+              <ul>
+                 <li>load GO assignments and associate them to features and a corresponding analysis</li>
+                 <li>Set the <a href="<?php print url('admin/tripal/tripal_cv/cvtermpath');?>">cvtermpath</a> for the 'biological process', 'molecular_function' and 'cellular_component' vocabularies.</li>
+                 <li>Populate the <a href="<?php print url('admin/tripal/tripal_mviews');?>">go_count_analysis</a> materialized view</li>
+              </ul> </p>
+           <?php }?>
+         </div><?php
      }
   ?>
   <div id="tripal_analysis_go_org_charts"></div>    

+ 6 - 2
tripal_analysis_go/tripal_analysis_go.module

@@ -566,7 +566,12 @@ function tripal_analysis_go_job_describe_args($callback,$args){
 
       $organism = tripal_core_chado_select('organism',array('genus','species'),array('organism_id' => $args[1]));
       $new_args['Organism'] = $organism[0]->genus." ". $organism[0]->species;
-      $new_args['Sequence Type'] = $args[8];
+      $new_args['Sequence Type'] = $args[7];
+      if(!$args[8]){
+        $new_args['Use Unique Name'] = 'No';
+      } else {
+        $new_args['Use Unique Name'] = 'Yes';
+      }
 
       // add in the analysis 
       if($args[2]){
@@ -585,7 +590,6 @@ function tripal_analysis_go_job_describe_args($callback,$args){
       }
 
       $new_args['Regular expression for the feature name'] = $args[6];
-      $new_args['Regular expression for the feature unique name'] = $args[7];
 
 
    }

+ 1 - 1
tripal_analysis_interpro/parseInterpro.inc

@@ -442,7 +442,7 @@ function tripal_analysis_interpro_parseSingleXMLFile ($analysis_id, $interproxml
             $analysisfeature = tripal_core_chado_insert('analysisfeature',$values);
             $analysisfeature_id = $analysisfeature['analysisfeature_id'];
          } else {
-         $analysisfeature_id = $analysisfeature[0]->analysisfeature_id;
+            $analysisfeature_id = $analysisfeature[0]->analysisfeature_id;
          }
 
          // Insert interpro xml results into analysisfeatureprop table