Jelajahi Sumber

Add analysis requirement to GFF loader

spficklin 14 tahun lalu
induk
melakukan
c616bca1ea

+ 2 - 2
tripal_analysis/tripal_analysis.admin.inc

@@ -48,8 +48,8 @@ function tripal_analysis_admin() {
 	if(!$active_jobs){
 		// add the field set for syncing analyses
 		get_tripal_analysis_admin_form_sync_set($form);
-		get_tripal_analysis_admin_form_reindex_set($form);
-		get_tripal_analysis_admin_form_taxonomy_set($form);
+//		get_tripal_analysis_admin_form_reindex_set($form);
+//		get_tripal_analysis_admin_form_taxonomy_set($form);
 		get_tripal_analysis_admin_form_cleanup_set($form);
 	} else {
 		$form['notice'] = array(

+ 2 - 1
tripal_analysis/tripal_analysis.info

@@ -6,4 +6,5 @@ project = tripal_analysis
 package = Tripal
 version = 6.x-0.2b-m0.2
 dependencies[] = tripal_core
-dependencies[] = tripal_feature
+dependencies[] = tripal_db
+dependencies[] = tripal_cv

+ 30 - 16
tripal_analysis/tripal_analysis.module

@@ -1,6 +1,5 @@
 <?php
 // $Id:
-// Copyright 2009 Clemson University
 /*******************************************************************************
  * Note: When we pull information for an analysis from chado database. We use
  * 'analysisname' instead of just 'name' to avoid name collision with drupal's
@@ -28,12 +27,9 @@ function tripal_analysis_unregister_child($modulename){
    }
 }
 /******************************************************************************
- * Tripal Analysis lets users display/hide analysis results associated
- * with a tripal feature. Load javascript when module initialized
  */
 function tripal_analysis_init(){
-   drupal_add_js(drupal_get_path('theme', 'tripal').
-                                 '/js/tripal_analysis.js');
+   drupal_add_js(drupal_get_path('theme', 'tripal').'/js/tripal_analysis.js');
 }
 /*******************************************************************************
  * tripal_analysis_menu()
@@ -283,6 +279,8 @@ function chado_analysis_form ($node){
       '#title' => t('Analysis Name'),
       '#required' => FALSE,
       '#default_value' => $node->analysisname,
+      '#description' => t("This should be a handy short identifier that 
+         describes the analysis succintly as possible which helps the user find analyses."),
       '#weight' => 1
 	);
 	$form['program']= array(
@@ -290,6 +288,7 @@ function chado_analysis_form ($node){
       '#title' => t('Program'),
       '#required' => TRUE,
       '#default_value' => $node->program,
+      '#description' => t("Program name, e.g. blastx, blastp, sim4, genscan."),
       '#weight' => 2
 	);
 	$form['programversion']= array(
@@ -297,6 +296,7 @@ function chado_analysis_form ($node){
       '#title' => t('Program Version'),
       '#required' => TRUE,
       '#default_value' => $node->programversion,
+      '#description' => t("Version description, e.g. TBLASTX 2.0MP-WashU [09-Nov-2000]"),
       '#weight' => 3
 	);
 	$form['algorithm']= array(
@@ -304,13 +304,17 @@ function chado_analysis_form ($node){
       '#title' => t('Algorithm'),
       '#required' => FALSE,
       '#default_value' => $node->algorithm,
+      '#description' => t("Algorithm name, e.g. blast."),
       '#weight' => 4
 	);
 	$form['sourcename']= array(
       '#type' => 'textfield',
       '#title' => t('Source Name'),
-      '#required' => FALSE,
+      '#required' => TRUE,
       '#default_value' => $node->sourcename,
+      '#description' => t('The name of the source data.  This could be a file name, data set name or a 
+           small description for how the data was collected.  For long descriptions use the description field below'),
+
       '#weight' => 5
 	);
 	$form['sourceversion']= array(
@@ -318,6 +322,7 @@ function chado_analysis_form ($node){
       '#title' => t('Source Version'),
       '#required' => FALSE,
       '#default_value' => $node->sourceversion,
+      '#description' => t('If the source dataset has a version, include it here'),
       '#weight' => 6
 	);
 	$form['sourceuri']= array(
@@ -325,6 +330,9 @@ function chado_analysis_form ($node){
       '#title' => t('Source URI'),
       '#required' => FALSE,
       '#default_value' => $node->sourceuri,
+      '#description' => t("This is a permanent URL or URI for the source of the analysis. 
+         Someone could recreate the analysis directly by going to this URI and 
+         fetching the source data (e.g. the blast database, or the training model)."),
       '#weight' => 7
 	);
 	// Get time saved in chado
@@ -356,6 +364,9 @@ function chado_analysis_form ($node){
       '#title' => t('Description and/or Program Settings'),
       '#required' => FALSE,
       '#default_value' => check_plain($node->description),
+      '#description' => t('Please provide all necessary information to allow
+         someone to recreate the analysis, including materials and methods
+         for collection of the source data and performing the analysis'),
       '#weight' => 9
 	);
 	
@@ -380,11 +391,12 @@ function chado_analysis_load($node){
              "WHERE Analysis_id = $ana_node->analysis_id";
 		$previous_db = tripal_db_set_active('chado');  // use chado database
 		$additions = db_fetch_object(db_query($sql));
-        // get number of features assc with this analysis
-        $sql = "SELECT count(feature_id) as featurecount ".
-             "FROM {Analysisfeature} ".
-             "WHERE Analysis_id = %d";
-        $additions->featurecount = db_result(db_query($sql, $ana_node->analysis_id));
+
+      // get number of features assc with this analysis
+//     $sql = "SELECT count(feature_id) as featurecount ".
+//            "FROM {Analysisfeature} ".
+//            "WHERE Analysis_id = %d";
+//      $additions->featurecount = db_result(db_query($sql, $ana_node->analysis_id));
 
 		tripal_db_set_active($previous_db);  // now use drupal database
 	}
@@ -613,7 +625,7 @@ function tripal_analysis_theme () {
 	return array(
       'tripal_analysis_analysis_page' => array (
          'arguments' => array('analyses'),
-	),
+	    ),
 	);
 }
 
@@ -771,11 +783,12 @@ function tripal_analyses_cleanup($dummy = NULL, $job_id = NULL) {
 /*******************************************************************************
  *
  */
+/*
 function tripal_analysis_reindex_features ($analysis_id = NULL, $job_id = NULL){
 	$i = 0;
 
 	// if the caller provided a analysis_id then get all of the features
-	// associated with the analysis. Otherwise get all sequences assoicated
+	// associated with the analysis. Otherwise get all sequences associated
 	// with all libraries.
 	if(!$analysis_id){
 		$sql = "SELECT Analysis_id, Feature_id ".
@@ -811,10 +824,11 @@ function tripal_analysis_reindex_features ($analysis_id = NULL, $job_id = NULL){
 		tripal_feature_sync_feature ($feature_id);
 		$i++;
 	}
-}
+} */
 /*******************************************************************************
  *
  */
+/*
 function tripal_analysis_taxonify_features ($analysis_id = NULL, $job_id = NULL){
 	$i = 0;
 
@@ -866,7 +880,7 @@ function tripal_analysis_taxonify_features ($analysis_id = NULL, $job_id = NULL)
 		$i++;
 	}
 }
-
+*/
 /*************************************************************************
  * Implements hook_views_api()
  * Purpose: Essentially this hook tells drupal that there is views support for
@@ -877,4 +891,4 @@ function tripal_analysis_views_api() {
    return array(
       'api' => 2.0,
    );
-}
+}

+ 1 - 1
tripal_analysis_blast/tripal_analysis_blast.install

@@ -14,7 +14,7 @@ function tripal_analysis_blast_install() {
    // information to the chado Analysis table. Also in tripal_analysis_blast.module,
    // we need to define HOOK_get_settings() for the module to work properly.
    
-   // Inert into drupal's {tripal_analysis}
+   // Register the analysis type
    tripal_analysis_register_child('tripal_analysis_blast');
    
    // Add cvterm 'analysis_blast_output_iteration_hits' for inserting into featureprop table

+ 6 - 80
tripal_analysis_blast/tripal_analysis_blast.module

@@ -790,86 +790,12 @@ function chado_analysis_blast_form ($node){
       '#type' => 'hidden',
       '#default_value' => $node->title,
 	);
-	$form['analysisname']= array(
-      '#type' => 'textfield',
-      '#title' => t('Analysis Name'),
-      '#required' => FALSE,
-      '#default_value' => $node->analysisname,
-      '#weight' => 1
-	);
-	$form['program']= array(
-      '#type' => 'textfield',
-      '#title' => t('Program'),
-      '#required' => TRUE,
-      '#default_value' => $node->program,
-      '#weight' => 2
-	);
-	$form['programversion']= array(
-      '#type' => 'textfield',
-      '#title' => t('Program Version'),
-      '#required' => TRUE,
-      '#default_value' => $node->programversion,
-      '#weight' => 3
-	);
-	$form['algorithm']= array(
-      '#type' => 'textfield',
-      '#title' => t('Algorithm'),
-      '#required' => FALSE,
-      '#default_value' => $node->algorithm,
-      '#weight' => 4
-	);
-	$form['sourcename']= array(
-      '#type' => 'textfield',
-      '#title' => t('Source Name'),
-      '#required' => FALSE,
-      '#default_value' => $node->sourcename,
-      '#weight' => 5
-	);
-	$form['sourceversion']= array(
-      '#type' => 'textfield',
-      '#title' => t('Source Version'),
-      '#required' => FALSE,
-      '#default_value' => $node->sourceversion,
-      '#weight' => 6
-	);
-	$form['sourceuri']= array(
-      '#type' => 'textfield',
-      '#title' => t('Source URI'),
-      '#required' => FALSE,
-      '#default_value' => $node->sourceuri,
-      '#weight' => 7
-	);
-	// Get time saved in chado
-	$default_time = $node->timeexecuted;
-	$year = preg_replace("/^(\d+)-\d+-\d+ .*/", "$1", $default_time);
-	$month = preg_replace("/^\d+-0?(\d+)-\d+ .*/", "$1", $default_time);
-	$day = preg_replace("/^\d+-\d+-0?(\d+) .*/", "$1", $default_time);
-	// If the time is not set, use current time
-	if (!$default_time) {
-		$default_time = time();
-		$year = format_date($default_time, 'custom', 'Y');
-		$month = format_date($default_time, 'custom', 'n');
-		$day = format_date($default_time, 'custom', 'j');
-	}
-	$form['timeexecuted']= array(
-      '#type' => 'date',
-      '#title' => t('Time Executed'),
-      '#required' => TRUE,
-      '#default_value' => array(
-         'year' => $year,
-         'month' => $month,
-         'day' => $day,
-	),
-      '#weight' => 8
-	);
-	$form['description']= array(
-      '#type' => 'textarea',
-      '#rows' => 15,
-      '#title' => t('Description and/or Program Settings'),
-      '#required' => FALSE,
-      '#default_value' => check_plain($node->description),
-      '#weight' => 9
-	);
+	
+   // add in the analysis fields
+   $analysis_form_elements = chado_analysis_form(null);
+   foreach ($analysis_form_elements as $key => $element){
+      $form = $element;
+   }
 
 	// Blast specific settings
 	if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {

+ 12 - 4
tripal_core/tripal_core.api.inc

@@ -335,10 +335,14 @@ function tripal_core_chado_update($table,$match,$values){
 *  An array of column names
 * @param $values
 *  An associative array containing the values for filtering the results.
+* @param $has_record
+*  Set this argument to 'true' to have this function return a numeric 
+*  value for the number of recrods rather than the array of records.  this
+*  can be useful in 'if' statements to check the presence of particula records.
 * 
 * @return
-*  A database query result resource, or FALSE if the query was not executed 
-*  correctly. 
+*  A database query result resource, FALSE if the query was not executed 
+*  correctly, or the number of records in the dataset if $has_record is set.
 *
 * Example usage:
 * @code
@@ -366,7 +370,7 @@ function tripal_core_chado_update($table,$match,$values){
 * species.  The cvterm is also specified using its foreign key and the cv_id 
 * for the cvterm is nested as well.
 */
-function tripal_core_chado_select($table,$columns,$values){
+function tripal_core_chado_select($table,$columns,$values,$has_record = 0){
 
    // get the table description
    $table_desc = module_invoke_all('chado_'.$table.'_schema');
@@ -431,7 +435,11 @@ function tripal_core_chado_select($table,$columns,$values){
      $results[] = $r;    
    }
    
-   return $results;
+   if(!$has_record){
+      return $results;
+   } else{
+      return scalar($results);
+   }
 }
 /**
 * Gets the value of a foreign key relationship

+ 55 - 5
tripal_feature/gff_loader.php

@@ -77,6 +77,44 @@ function tripal_core_gff3_load_form (){
                            will be removed rather than imported'),
       '#weight' => 5
    );
+
+   $form['analysis'] = array(
+      '#type' => 'fieldset',
+      '#title' => t('Analysis Used to Derive Features'),
+      '#weight'=> 6,
+      '#collapsed' => TRUE
+   ); 
+   $form['analysis']['desc'] = array(
+      '#type' => 'markup',
+      '#value' => t("Why specify an analysis for a data load?  All data comes 
+         from some place, even if downloaded from Genbank. By specifying
+         analysis details for all data uploads, it allows an end user to reproduce the
+         data set.  In some cases some of the fields may not apply.  For 
+         data downloaded from Genbank, the 'program' field does not apply but is
+         required.  In this case, simply provide the name of the data source 
+         (e.g. NCBI Genbank) for the program and use the date accessed for the
+         program version. For the description, be sure to include the search
+         criteria used for selecting data from GenBank."), 
+   );
+
+   // get the list of organisms
+   $sql = "SELECT * FROM {analysis} ORDER BY name";
+   $previous_db = tripal_db_set_active('chado');  // use chado database
+   $org_rset = db_query($sql);
+   tripal_db_set_active($previous_db);  // now use drupal database
+   $analyses = array();
+   $analyses[''] = '';
+   while($analysis = db_fetch_object($org_rset)){
+      $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
+   }
+   $form['analysis']['analysis_id'] = array (
+     '#title'       => t('Analysis'),
+     '#type'        => t('select'),
+     '#description' => t("Choose the analysis to which these features are associated "),
+     '#required'    => TRUE,
+     '#options'     => $analyses,
+   );
+
    $form['button'] = array(
       '#type' => 'submit',
       '#value' => t('Import GFF3 file'),
@@ -128,8 +166,9 @@ function tripal_core_gff3_load_form_submit ($form, &$form_state){
    $update   = $form_state['values']['update'];
    $refresh  = $form_state['values']['refresh'];
    $remove   = $form_state['values']['remove'];
+   $analysis = $form_state['values']['analysis_id'];
 
-   $args = array($gff_file,$organism_id,$add_only,$update,$refresh,$remove);
+   $args = array($gff_file,$organism_id,$analysis_id,$add_only,$update,$refresh,$remove);
    $type = '';
    if($add_only){
      $type = 'import only new features';
@@ -143,7 +182,7 @@ function tripal_core_gff3_load_form_submit ($form, &$form_state){
    if($remove){
      $type = 'delete features';
    }
-   tripal_add_job("$type GFF3 file $dfile",'tripal_core',
+   tripal_add_job("$type GFF3 file $gff_file",'tripal_core',
       'tripal_core_load_gff3',$args,$user->uid);
 
    return '';
@@ -151,7 +190,7 @@ function tripal_core_gff3_load_form_submit ($form, &$form_state){
 /*************************************************************************
 *
 */
-function tripal_core_load_gff3($gff_file, $organism_id,$add_only =0, 
+function tripal_core_load_gff3($gff_file, $organism_id,$analysis_id,$add_only =0, 
    $update = 0, $refresh = 0, $remove = 0, $job = NULL)
 {
 
@@ -365,7 +404,7 @@ function tripal_core_load_gff3($gff_file, $organism_id,$add_only =0,
     
 
          // add/update the feature
-         $feature = tripal_core_load_gff3_feature($organism,$cvterm,
+         $feature = tripal_core_load_gff3_feature($organism,$analysis_id,$cvterm,
             $attr_uniquename,$attr_name,$residues,$attr_is_analysis,
             $attr_is_obsolete, $add_only,$generate_name);
 
@@ -624,7 +663,7 @@ function tripal_core_load_gff3_alias($feature,$aliases){
 /*************************************************************************
 *
 */
-function tripal_core_load_gff3_feature($organism,$cvterm,$uniquename,$name,
+function tripal_core_load_gff3_feature($organism,$analysis_id,$cvterm,$uniquename,$name,
    $residues,$is_analysis='f',$is_obsolete='f',$add_only,$generate_name)  {
 
    if($generate_name){
@@ -676,7 +715,18 @@ function tripal_core_load_gff3_feature($organism,$cvterm,$uniquename,$name,
       print "Skipping existing feature: '$uniquename' ($cvterm->name).\n";
       return 0;
    }
+   // get the newly added feature
    $feature = db_fetch_object(db_query($feature_sql,$organism->organism_id,$uniquename,$cvterm->cvterm_id));
+
+   // add the analysisfeature entry to the analysisfeature table if it doesn't already exist
+   $af_values = array('analysis_id' => $analysis_id, 'feature_id' => $feature->feature_id);
+   if(tripal_core_chado_select('analysisfeature','analysisfeature_id',$af,true)){
+      if(!tripal_core_chado_insert('analysisfeature',$af_values)){
+         print "ERROR: could not add analysisfeature record: $analysis_id, $feature->feature_id\n";
+         return 0;
+      }
+   }
+
    // now that we've added the feature let's reset it's uniquename to be
    // the feature id
    if($generate_name){

+ 11 - 3
tripal_feature/tripal_feature.admin.inc

@@ -6,9 +6,17 @@
  * @return HTML Formatted text
  */
 function tripal_feature_module_description_page() {
-  $text = '';
-  
-  $text .= '<h3>Description:</h3>';
+
+  $text .= '<h3>Tripal Feature Administrative Tools Quick Links:</h3>';
+  $text .= "<ul>";
+  $text .= "<li><a href=\"".url("admin/tripal/tripal_feature/configuration") . "\">Feature Configuration</a></li>";
+  $text .= "<li><a href=\"".url("admin/tripal/tripal_feature/fasta_loader"). "\">Import a multi-FASTA file</a></li>";
+  $text .= "<li><a href=\"".url("admin/tripal/tripal_feature/gff3_load"). "\">Import a GFF3 file</a></li>";
+  $text .= "<li><a href=\"".url("admin/tripal/tripal_feature/aggregate"). "\">Feature Relationship Aggegators</a></li>";
+  $text .= "</ul>";
+ 
+
+  $text .= '<h3>Module Description:</h3>';
   $text .= '<p>TODO: Basic Description of this module including mention/link to the chado module</p>';
 
   $text .= '<h3>Post Installation Instructions:</h3>';

+ 4 - 2
tripal_feature/tripal_feature.info

@@ -4,9 +4,11 @@ description = A module for interfacing the GMOD chado database with Drupal, prov
 core = 6.x
 project = tripal_feature
 package = Tripal
-dependencies[] = tripal_core
-dependencies[] = tripal_organism
 dependencies[] = search
 dependencies[] = path
+dependencies[] = tripal_core
+dependencies[] = tripal_db
 dependencies[] = tripal_cv
+dependencies[] = tripal_organism
+dependencies[] = tripal_analysis
 version = "6.x-0.2b-m0.2"

+ 9 - 3
tripal_feature/tripal_feature.install

@@ -13,6 +13,10 @@ function tripal_feature_install(){
   
    // add the materialized view
    tripal_feature_add_organism_count_mview();
+
+   // Register this module as having an analysis type
+   tripal_analysis_register_child('tripal_feature');
+
 }
 /*******************************************************************************
 *  Update for Drupal 6.x, Tripal 0.2b, Feature Module 0.2
@@ -29,15 +33,16 @@ function tripal_feature_update_6000(){
    return $ret;
 }
 /*******************************************************************************
-*  Update for Drupal 6.x, Tripal 0.2b, Feature Module 0.2
-*  This update adjusts the materialized view by adding a 'cvterm_id' column
-*/
 
+*/
 function tripal_feature_update_6300(){
    // add the relationship aggregator table to the database
    $schema = tripal_feature_get_schemas('tripal_feature_relagg');
    $ret = array();
    db_create_table($ret, 'tripal_feature_relagg', $schema['tripal_feature_relagg']);
+
+   // Register this module as having an analysis type
+   tripal_analysis_register_child('tripal_feature');
    
    return $ret;
 }
@@ -169,6 +174,7 @@ function tripal_feature_get_schemas ($table = NULL){
 function tripal_feature_requirements($phase) {
    $requirements = array();
    if ($phase == 'install') {
+      // make sure the core module is installed...
       if (!function_exists('tripal_create_moddir')) {
          $requirements ['tripal_feature'] = array(
             'title' => "tripal_feature",

+ 1 - 1
tripal_feature/tripal_feature.module

@@ -132,7 +132,7 @@ function tripal_feature_menu() {
      'type' => MENU_NORMAL_ITEM,
    );
    $items['admin/tripal/tripal_feature/configuration'] = array(
-     'title' => 'Configuration',
+     'title' => 'Feature Configuration',
      'description' => 'Settings for Chado Features',
      'page callback' => 'drupal_get_form',
      'page arguments' => array('tripal_feature_admin'),