Переглянути джерело

Add best hit homology report to the blast module.

ccheng 14 роки тому
батько
коміт
7c6b381d41

+ 51 - 8
tripal_analysis_blast/tripal_analysis_blast.install

@@ -17,14 +17,8 @@ function tripal_analysis_blast_install() {
    // Register the analysis type
    // Register the analysis type
    tripal_analysis_register_child('tripal_analysis_blast');
    tripal_analysis_register_child('tripal_analysis_blast');
    
    
-   // Add cvterm 'analysis_blast_output_iteration_hits' for inserting into featureprop table
-   tripal_add_cvterms('analysis_blast_output_iteration_hits', 'Hits of a blast '.
-      'output iteration. Each iteration corresponds to a chado feature, and is '.
-      'the content between <iteration> and </iteration> tags in the blast xml '.
-      'output file. This cvterm represents all hits in the iteration');
-   // Add cveterm 'analysis_blast_settings' for inserting into analysisprop table   
-   tripal_add_cvterms('analysis_blast_settings', 'Settings of a blast analysis, '.
-      'including db_id, output file, and run parameters separated by a bar |');
+   // Add cvterms
+   tripal_analysis_blast_add_cvterms();
    
    
    // Create a tripal_analysis_blast table to store parsers
    // Create a tripal_analysis_blast table to store parsers
    drupal_install_schema('tripal_analysis_blast');
    drupal_install_schema('tripal_analysis_blast');
@@ -144,3 +138,52 @@ function tripal_analysis_blast_requirements($phase) {
    }
    }
    return $requirements;
    return $requirements;
 }
 }
+
+/*******************************************************************************
+ * Provide update script for adding new cvterms
+ */
+function tripal_analysis_blast_update_6000(){
+
+   // we have some new cvterms to add
+   tripal_analysis_blast_add_cvterms();
+
+   $ret = array(
+      '#finished' => 1,
+   );
+   
+   return $ret;
+}
+
+/*******************************************************************************
+ * Function for adding cvterms
+ */
+function tripal_analysis_blast_add_cvterms () {
+	
+ // Add cvterm 'analysis_blast_output_iteration_hits' for inserting into featureprop table
+   tripal_add_cvterms('analysis_blast_output_iteration_hits', 'Hits of a blast '.
+      'output iteration. Each iteration corresponds to a chado feature, and is '.
+      'the content between <iteration> and </iteration> tags in the blast xml '.
+      'output file. This cvterm represents all hits in the iteration');
+   
+   tripal_add_cvterms('analysis_blast_besthit_query', 'query name of the best hit 
+   associated with a feature and a blast analysis ');
+
+   tripal_add_cvterms('analysis_blast_besthit_match', 'match name of the best hit 
+   associated with a feature and a blast analysis ');
+    
+   tripal_add_cvterms('analysis_blast_besthit_description', 'description of the best hit 
+   associated with a feature and a blast analysis ');
+   
+   tripal_add_cvterms('analysis_blast_besthit_evalue', 'evalue of the best hit 
+   associated with a feature and a blast analysis ');
+    
+   tripal_add_cvterms('analysis_blast_besthit_identity', 'identity (%) of the best hit 
+   associated with a feature and a blast analysis ');
+   
+   tripal_add_cvterms('analysis_blast_besthit_length', 'length of the best hit 
+   associated with a feature and a blast analysis ');
+   
+   // Add cveterm 'analysis_blast_settings' for inserting into analysisprop table   
+   tripal_add_cvterms('analysis_blast_settings', 'Settings of a blast analysis, '.
+      'including db_id, output file, and run parameters separated by a bar |');
+}

+ 488 - 15
tripal_analysis_blast/tripal_analysis_blast.module

@@ -32,6 +32,14 @@ function tripal_analysis_blast_menu() {
       'access arguments' => array('administer site configuration'),
       'access arguments' => array('administer site configuration'),
       'type' => MENU_CALLBACK
       'type' => MENU_CALLBACK
 	);
 	);
+	$items['tripal_blast_report'] = array(
+      'title' => t('Homology Report'),
+      'page callback' => 'tripal_get_blast_report',
+      'page arguments' => array(1,2,3,4,5),
+      'access arguments' => array('access chado_analysis_blast content'),
+      'type' => MENU_CALLBACK,
+	   'file' => 'tripal_analysis_blast_htmlreport.inc'
+	);
 	return $items;
 	return $items;
 }
 }
 
 
@@ -421,7 +429,7 @@ function tripal_analysis_blast_get_settings() {
 /*******************************************************************************
 /*******************************************************************************
  * Parse Blast XML Output file into analysisfeatureprop table
  * Parse Blast XML Output file into analysisfeatureprop table
  */
  */
-function tripal_analysis_blast_parseXMLFile ($analysis_id, $blastdb, $blastfile, $job_id) {
+function tripal_analysis_blast_parseXMLFile ($analysis_id, $blastdb, $blastfile, $no_parsed, $job_id) {
 
 
 	// Prepare log
 	// Prepare log
 	$filename = preg_replace("/.*\/(.*)/", "$1", $blastfile);
 	$filename = preg_replace("/.*\/(.*)/", "$1", $blastfile);
@@ -433,7 +441,12 @@ function tripal_analysis_blast_parseXMLFile ($analysis_id, $blastdb, $blastfile,
 		// Parsing started
 		// Parsing started
 		print "Parsing File:".$blastfile." ...\n";
 		print "Parsing File:".$blastfile." ...\n";
 		fwrite($log, date("D M j G:i:s Y").". Loading $blastfile\n");
 		fwrite($log, date("D M j G:i:s Y").". Loading $blastfile\n");
-
+		if ($no_parsed == 'all') {
+			print "Parsing all hits...\n";
+		} else {
+			print "Parsing top $no_parsed hits...\n";
+		}
+		
 		// Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
 		// Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
 		// for inserting into the analysisfeatureprop table
 		// for inserting into the analysisfeatureprop table
 		$previous_db = tripal_db_set_active('chado'); // use chado database
 		$previous_db = tripal_db_set_active('chado'); // use chado database
@@ -536,7 +549,26 @@ function tripal_analysis_blast_parseXMLFile ($analysis_id, $blastdb, $blastfile,
 
 
 									$result = db_query($sql, $feature_id, $analysis_id, $type_id);
 									$result = db_query($sql, $feature_id, $analysis_id, $type_id);
 									$analysisfeatureprop = db_fetch_object($result);
 									$analysisfeatureprop = db_fetch_object($result);
-									$xml_content = "<Iteration>\n".$featurename_xml."\n".$iteration_tags->asXML()."\n</Iteration>";
+									$xml_content =  "<Iteration>\n".$featurename_xml."\n";
+									// parse all hits
+									if ($no_parsed == 'all') {
+										$xml_content .= $iteration_tags->asXML();
+									
+									// parse only top hits
+									} else {
+										$counter = 0;
+										$xml_content .= "<Iteration_hits>\n";
+										foreach ($iteration_tags->children() As $hit) {
+											if ($counter < $no_parsed) {
+												$xml_content .= $hit->asXML();
+											} else {
+												break;
+											}
+											$counter ++;
+										}
+										$xml_content .= "</Iteration_hits>";
+									}
+									$xml_content .= "\n</Iteration>";
 
 
 									// If this Iteration_hits already exists, update it
 									// If this Iteration_hits already exists, update it
 									if ($analysisfeatureprop) {
 									if ($analysisfeatureprop) {
@@ -581,6 +613,11 @@ function tripal_analysis_blast_parseXMLFile ($analysis_id, $blastdb, $blastfile,
 		$pattern = sql_regcase($blastfile . "/*.XML");
 		$pattern = sql_regcase($blastfile . "/*.XML");
 		$total_files = count(glob($pattern));
 		$total_files = count(glob($pattern));
 		print "$total_files file(s) to be parsed.\n";
 		print "$total_files file(s) to be parsed.\n";
+		if ($no_parsed == 'all') {
+			print "Parsing all hits...\n";
+		} else {
+			print "Parsing top $no_parsed hits...\n";
+		}
 		$interval = intval($total_files * 0.01);
 		$interval = intval($total_files * 0.01);
 		$no_file = 0;
 		$no_file = 0;
 
 
@@ -692,7 +729,26 @@ function tripal_analysis_blast_parseXMLFile ($analysis_id, $blastdb, $blastfile,
 
 
 											$result = db_query($sql, $feature_id, $analysis_id, $type_id);
 											$result = db_query($sql, $feature_id, $analysis_id, $type_id);
 											$analysisfeatureprop = db_fetch_object($result);
 											$analysisfeatureprop = db_fetch_object($result);
-											$xml_content = "<Iteration>\n".$featurename_xml."\n".$iteration_tags->asXML()."\n</Iteration>";
+											$xml_content =  "<Iteration>\n".$featurename_xml."\n";
+											// parse all hits
+											if ($no_parsed == 'all') {
+												$xml_content .= $iteration_tags->asXML();
+									
+											// parse only top hits
+											} else {
+												$counter = 0;
+												$xml_content .= "<Iteration_hits>\n";
+												foreach ($iteration_tags->children() As $hit) {
+													if ($counter < $no_parsed) {
+														$xml_content .= $hit->asXML();
+													} else {
+														break;
+													}
+													$counter ++;
+												}
+												$xml_content .= "</Iteration_hits>";
+											}
+											$xml_content .= "\n</Iteration>";
 
 
 											// If this Iteration_hits already exists, update it
 											// If this Iteration_hits already exists, update it
 											if ($analysisfeatureprop) {
 											if ($analysisfeatureprop) {
@@ -788,12 +844,86 @@ function chado_analysis_blast_form ($node){
       '#type' => 'hidden',
       '#type' => 'hidden',
       '#default_value' => $node->title,
       '#default_value' => $node->title,
 	);
 	);
-	
-   // add in the analysis fields
-   $analysis_form_elements = chado_analysis_form(null);
-   foreach ($analysis_form_elements as $key => $element){
-      $form[$key] = $element;
-   }
+	$form['analysisname']= array(
+      '#type' => 'textfield',
+      '#title' => t('Analysis Name'),
+      '#required' => FALSE,
+      '#default_value' => $node->analysisname,
+      '#weight' => 1
+	);
+	$form['program']= array(
+      '#type' => 'textfield',
+      '#title' => t('Program'),
+      '#required' => TRUE,
+      '#default_value' => $node->program,
+      '#weight' => 2
+	);
+	$form['programversion']= array(
+      '#type' => 'textfield',
+      '#title' => t('Program Version'),
+      '#required' => TRUE,
+      '#default_value' => $node->programversion,
+      '#weight' => 3
+	);
+	$form['algorithm']= array(
+      '#type' => 'textfield',
+      '#title' => t('Algorithm'),
+      '#required' => FALSE,
+      '#default_value' => $node->algorithm,
+      '#weight' => 4
+	);
+	$form['sourcename']= array(
+      '#type' => 'textfield',
+      '#title' => t('Source Name'),
+      '#required' => FALSE,
+      '#default_value' => $node->sourcename,
+      '#weight' => 5
+	);
+	$form['sourceversion']= array(
+      '#type' => 'textfield',
+      '#title' => t('Source Version'),
+      '#required' => FALSE,
+      '#default_value' => $node->sourceversion,
+      '#weight' => 6
+	);
+	$form['sourceuri']= array(
+      '#type' => 'textfield',
+      '#title' => t('Source URI'),
+      '#required' => FALSE,
+      '#default_value' => $node->sourceuri,
+      '#weight' => 7
+	);
+	// Get time saved in chado
+	$default_time = $node->timeexecuted;
+	$year = preg_replace("/^(\d+)-\d+-\d+ .*/", "$1", $default_time);
+	$month = preg_replace("/^\d+-0?(\d+)-\d+ .*/", "$1", $default_time);
+	$day = preg_replace("/^\d+-\d+-0?(\d+) .*/", "$1", $default_time);
+	// If the time is not set, use current time
+	if (!$default_time) {
+		$default_time = time();
+		$year = format_date($default_time, 'custom', 'Y');
+		$month = format_date($default_time, 'custom', 'n');
+		$day = format_date($default_time, 'custom', 'j');
+	}
+	$form['timeexecuted']= array(
+      '#type' => 'date',
+      '#title' => t('Time Executed'),
+      '#required' => TRUE,
+      '#default_value' => array(
+         'year' => $year,
+         'month' => $month,
+         'day' => $day,
+	),
+      '#weight' => 8
+	);
+	$form['description']= array(
+      '#type' => 'textarea',
+      '#rows' => 15,
+      '#title' => t('Description and/or Program Settings'),
+      '#required' => FALSE,
+      '#default_value' => check_plain($node->description),
+      '#weight' => 9
+	);
 
 
 	// Blast specific settings
 	// Blast specific settings
 	if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {
 	if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {
@@ -838,15 +968,32 @@ function chado_analysis_blast_form ($node){
       '#description' => t('The xml output file generated by blast in full path.'),
       '#description' => t('The xml output file generated by blast in full path.'),
       '#default_value' => $node->blastfile,
       '#default_value' => $node->blastfile,
 	);
 	);
+	
+	$form['blast']['no_parsed'] = array(
+	  '#title' => t('Number of hits to be parsed'),
+      '#type' => 'textfield',
+      '#description' => t("The number of hits to be parsed. Tripal will parse only top 10 hits if you input '10'' in this field."),
+      '#default_value' => 'all',
+	);
+	
 	$form['blast']['blastjob'] = array(
 	$form['blast']['blastjob'] = array(
       '#type' => 'checkbox',
       '#type' => 'checkbox',
       '#title' => t('Submit a job to parse the xml output into analysisfeatureprop table'),
       '#title' => t('Submit a job to parse the xml output into analysisfeatureprop table'),
       '#description' => t('Note: features associated with the blast results must '.
       '#description' => t('Note: features associated with the blast results must '.
                           'exist in chado before parsing the file. Otherwise, blast '.
                           'exist in chado before parsing the file. Otherwise, blast '.
                           'results that cannot be linked to a feature will be '.
                           'results that cannot be linked to a feature will be '.
-                          'discarded. Also, Triapl Blast module needs to be enabled.'),
+                          'discarded. '),
       '#default_value' => $node->blastjob
       '#default_value' => $node->blastjob
 	);
 	);
+	
+	$form['blast']['blastbesthit'] = array(
+      '#type' => 'checkbox',
+      '#title' => t('Submit a job to parse the best hit from the  xml output so a best hit report can be created'),
+      '#description' => t('Note: the xml parser for this blast database  needs to be set first.  '.
+                          'Otherwise, the hit description may contain undesirable content.'),
+      '#default_value' => $node->blastbesthit
+	);
+	
 	$form['blast']['blastparameters'] = array(
 	$form['blast']['blastparameters'] = array(
       '#title' => t('Parameters'),
       '#title' => t('Parameters'),
       '#type' => 'textfield',
       '#type' => 'textfield',
@@ -987,9 +1134,24 @@ function chado_analysis_blast_insert($node){
             "VALUES (%d, %d, '%s')";
             "VALUES (%d, %d, '%s')";
 	$blastsettings = $node->blastdb."|".$node->blastfile."|".$node->blastparameters;
 	$blastsettings = $node->blastdb."|".$node->blastfile."|".$node->blastparameters;
 	db_query($sql, $analysis_id, $type_id, $blastsettings);
 	db_query($sql, $analysis_id, $type_id, $blastsettings);
-
-	tripal_db_set_active($previous_db);  // switch back to drupal database
-
+	
+	    // Get cvterm_id for 'analysis_type'
+	    $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
+                "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
+                "WHERE CVT.name = 'analysis_type' ".
+                "AND CV.name = 'tripal'";
+	    $type_id = db_result(db_query($sql));
+    
+        //---------------------------------------------------
+	    // Insert into chado {analysisprop} table
+        //---------------------------------------------------
+	    $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) ".
+                "VALUES (%d, %d, '%s')";
+	    $analysis_type = "tripal_analysis_blast";
+	    db_query($sql, $analysis_id, $type_id, $analysis_type);
+    
+	    tripal_db_set_active($previous_db);  // switch back to drupal database
+	    	
     //---------------------------------------------------
     //---------------------------------------------------
 	// Add a job if the user wants to parse the xml output
 	// Add a job if the user wants to parse the xml output
     //---------------------------------------------------
     //---------------------------------------------------
@@ -997,6 +1159,7 @@ function chado_analysis_blast_insert($node){
 		$job_args[0] = $analysis_id;
 		$job_args[0] = $analysis_id;
 		$job_args[1] = $node->blastdb;
 		$job_args[1] = $node->blastdb;
 		$job_args[2] = $node->blastfile;
 		$job_args[2] = $node->blastfile;
+		$job_args[3] = $node->no_parsed;
 		if (is_readable($node->blastfile)) {
 		if (is_readable($node->blastfile)) {
 			$fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
 			$fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
 			tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
 			tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
@@ -1005,7 +1168,12 @@ function chado_analysis_blast_insert($node){
 			drupal_set_message("Can not open blast output file. Job not scheduled.");
 			drupal_set_message("Can not open blast output file. Job not scheduled.");
 		}
 		}
 	}
 	}
-
+	if($node->blastbesthit) {
+			$j_args[0] = $analysis_id;	
+			tripal_add_job("Parse best hit: $node->analysisname",'tripal_analysis_blast',
+                           'tripal_analysis_blast_parse_best_hit', $j_args, $user->uid);			
+		}
+		
 	// next add the item to the drupal table
 	// next add the item to the drupal table
 	$sql = "INSERT INTO {chado_analysis} (nid, vid, analysis_id) ".
 	$sql = "INSERT INTO {chado_analysis} (nid, vid, analysis_id) ".
             "VALUES (%d, %d, %d)";
             "VALUES (%d, %d, %d)";
@@ -1120,6 +1288,7 @@ function chado_analysis_blast_update($node){
 			$job_args[0] = $analysis_id;
 			$job_args[0] = $analysis_id;
 			$job_args[1] = $node->blastdb;
 			$job_args[1] = $node->blastdb;
 			$job_args[2] = $node->blastfile;
 			$job_args[2] = $node->blastfile;
+			$job_args[3] = $node->no_parsed;
 			if (is_readable($node->blastfile)) {
 			if (is_readable($node->blastfile)) {
 				$fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
 				$fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
 				tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
 				tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
@@ -1128,6 +1297,11 @@ function chado_analysis_blast_update($node){
 				drupal_set_message("Can not open blast output file. Job not scheduled.");
 				drupal_set_message("Can not open blast output file. Job not scheduled.");
 			}
 			}
 		}
 		}
+		if($node->blastbesthit) {
+			$j_args[0] = $analysis_id;	
+			tripal_add_job("Parse best hit: $node->analysisname",'tripal_analysis_blast',
+                           'tripal_analysis_blast_parse_best_hit', $j_args, $user->uid);			
+		}
 
 
 		// Create a title for the analysis node using the unique keys so when the
 		// Create a title for the analysis node using the unique keys so when the
 		// node is saved, it will have a title
 		// node is saved, it will have a title
@@ -1507,3 +1681,302 @@ function tripal_analysis_blast_get_result_object($xml_string,$db,$max,$feature_i
 	$blast_object->hits_array = $hits_array;
 	$blast_object->hits_array = $hits_array;
 	return $blast_object;
 	return $blast_object;
 }
 }
+
+/********************************************************************************
+ * Parse the best hit to generate the best hit homology report 
+ */
+function tripal_analysis_blast_parse_best_hit ($analysis_id) {
+
+	// Select all features for this blast analysis, and save them to the 'featureSet' array
+	$sql = "SELECT feature_id
+	             FROM {analysisfeature} AF  
+	             WHERE analysis_id = %d";
+	$previous_db = tripal_db_set_active('chado');
+	$result = db_query($sql, $analysis_id);
+	$featureSet = array ();
+	$counter = 0;
+	while ($feature = db_fetch_object($result)) {
+		$featureSet [$counter] = $feature->feature_id;
+		$counter ++;
+	}
+
+	// Get analysis information including 'Time', 'Name', and 'DB Settings'
+	$sql = "SELECT value, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
+                FROM {analysis}  A
+                INNER JOIN {analysisprop} AP ON  A.analysis_id = AP.analysis_id 
+                WHERE A.analysis_id = %d
+                AND type_id= (SELECT cvterm_id 
+                                           FROM {cvterm} 
+                                           WHERE name = 'analysis_blast_settings')";
+	$analysis = db_fetch_object(db_query($sql, $analysis_id));
+	
+	// Parse the blast settings
+	$blastsettings = explode("|", $analysis->value);
+	$db_id = $blastsettings [0];
+	
+	// Get the xml description parser using db_id
+	tripal_db_set_active($previous_db);
+	$sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
+	$parser = db_fetch_object(db_query($sql, $db_id));
+
+	$db_name = $parser->displayname;
+	$is_genbank = $parser->genbank_style;
+	$regex_hit_id = $parser->regex_hit_id;
+	$regex_hit_def = $parser->regex_hit_def;
+	$regex_hit_accession = $parser->regex_hit_accession;
+
+	// set default description parser  if regular expressions have not been specified
+	if(!$regex_hit_id){
+		$regex_hit_id = '/^(.*?)\s.*$/';
+	} else {
+		$regex_hit_id = '/'.$regex_hit_id.'/';
+	}
+	if(!$regex_hit_def){
+		$regex_hit_def = '/^.*?\s(.*)$/';
+	} else {
+		$regex_hit_def = '/'.$regex_hit_def.'/';
+	}
+	if(!$regex_hit_accession){
+		$regex_hit_accession = '/^(.*?)\s.*$/';
+	} else {
+		$regex_hit_accession = '/'.$regex_hit_accession.'/';
+	}
+	
+   $interval = intval($counter * 0.01);
+	for ($i = 0; $i < $counter; $i ++) {
+		
+		if ($i !=0 && $i % $interval == 0) {
+			$percentage = (int) ($i / $counter * 100);
+			tripal_job_set_progress($job_id, $percentage);	
+			print $percentage."% ";
+		}
+
+		$sql = "SELECT value
+		             FROM {analysisfeatureprop} AFP 
+		             INNER JOIN {analysisfeature} AF ON AFP.analysisfeature_id = AF.analysisfeature_id 
+		             WHERE analysis_id = %d 
+		             AND feature_id = %d
+		             AND type_id = (SELECT cvterm_id FROM cvterm WHERE name='analysis_blast_output_iteration_hits' AND cv_id = (SELECT cv_id FROM cv WHERE name='tripal'))";
+		$previous_db = tripal_db_set_active('chado');
+		$xml_output = simplexml_load_string(db_result(db_query($sql, $analysis_id, $featureSet[$i])));
+		
+		$iteration = '';
+		// new XML file parser has added the feature name within <Iteration_query-def> tags.
+		if ($xml_output->getName() == 'Iteration') {
+			$query = "";
+			foreach ($xml_output->children() as $xml_tag) {
+				if ($xml_tag->getName() == 'Iteration_query-def') {
+					// Here we show the feature name again to check if we pull the correct data
+					$query = $xml_tag;
+				} else if ($xml_tag->getName() == 'Iteration_hits') {
+					$iteration = $xml_tag;
+				}
+			}
+	   // This is for the file parsed by the old parser
+		} else {
+			$iteration = $xml_output;
+		}
+
+		$number_hits = 0;
+		foreach($iteration->children() as $hits){
+			$number_hits ++;
+		}
+
+		$query = explode(" ", $query) ;
+		$query = $query [0];
+		if ($number_hits == 0) {
+			continue;
+		} 
+
+		// now run through the blast hits/hsps of this iteration
+		// and generate the rows of the table
+
+		foreach($iteration->children() as $hits){
+			$hit_count++;
+			foreach($hits->children() as $hit){
+				$best_evalue = 0;
+				$best_identity = 0;
+				$best_len = 0;
+				$element_name = $hit->getName();
+				if($element_name == 'Hit_id'){
+					// if parsing "name, acc, desc" from three tags (1/3)
+					if ($is_genbank) {
+						$hit_name = $hit;
+					}
+				} else if($element_name == 'Hit_def'){
+					if($is_genbank){
+						$description = $hit;
+					} else {
+						$accession = preg_replace($regex_hit_accession,"$1",$hit);
+						$hit_name = preg_replace($regex_hit_id,"$1",$hit);
+						$description = preg_replace($regex_hit_def,"$1",$hit);
+					}
+				} else if($element_name == 'Hit_accession'){
+					// if parsing "name, acc, desc" from three tags (3/3)
+					if ($is_genbank){
+						$accession = $hit;
+					}
+					// now run through each HSP for this hit
+				} else if($element_name == 'Hit_hsps'){
+					foreach($hit->children() as $hsp){
+						foreach($hsp->children() as $hsp_info){
+							$element_name = $hsp_info->getName();
+							if($element_name == 'Hsp_num'){
+								$hsp_num = $hsp_info;
+							}
+							if($element_name == 'Hsp_bit-score'){
+								$hsp_bit_score = $hsp_info;
+							}
+							if($element_name == 'Hsp_score'){
+								$hsp_score = $hsp_info;
+							}
+							if($element_name == 'Hsp_evalue'){
+								$hsp_evalue = $hsp_info;
+								// use the first evalue for this set of HSPs
+								// as the best evalue. This get's shown as
+								// info for the overall match.
+								if(!$best_evalue){
+									$best_evalue = $hsp_evalue;
+								}
+							}
+							if($element_name == 'Hsp_query-from'){
+								$hsp_query_from = $hsp_info;
+							}
+							if($element_name == 'Hsp_query-to'){
+								$hsp_query_to = $hsp_info;
+							}
+							if($element_name == 'Hsp_hit-from'){
+								$hsp_hit_from = $hsp_info;
+							}
+							if($element_name == 'Hsp_hit-to'){
+								$hsp_hit_to = $hsp_info;
+							}
+							if($element_name == 'Hsp_query-frame'){
+								$hsp_query_frame = $hsp_info;
+							}
+							if($element_name == 'Hsp_identity'){
+								$hsp_identity = $hsp_info;
+								// use the first evalue for this set of HSPs
+								// as the best evalue. This get's shown as
+								// info for the overall match.
+								if(!$best_identity){
+									$best_identity = $hsp_identity;
+								}
+							}
+							if($element_name == 'Hsp_positive'){
+								$hsp_positive = $hsp_info;
+							}
+							if($element_name == 'Hsp_align-len'){
+								$hsp_align_len = $hsp_info;
+								// use the first evalue for this set of HSPs
+								// as the best evalue. This get's shown as
+								// info for the overall match.
+								if(!$best_len){
+									$best_len = $hsp_align_len;
+								}
+							}
+							if($element_name == 'Hsp_qseq'){
+								$hsp_qseq = $hsp_info;
+							}
+							if($element_name == 'Hsp_hseq'){
+								$hsp_hseq = $hsp_info;
+							}
+							if($element_name == 'Hsp_midline'){
+								$hsp_midline = $hsp_info;
+							}
+						}
+					}
+				}
+			}
+			
+			// Get analysisfeature_id
+			$sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE analysis_id = %d AND feature_id = %d";
+			$af_id = db_result(db_query($sql, $analysis_id, $featureSet[$i]));
+			
+			// Get type_id
+			$sql = "SELECT cvterm_id FROM {cvterm} WHERE name = '%s' AND cv_id = (SELECT cv_id FROM {cv} WHERE name = 'tripal')"; 
+			$type_id = db_result(db_query($sql, 'analysis_blast_besthit_query'));
+			
+			$sql_test ="SELECT analysisfeatureprop_id FROM {analysisfeatureprop} WHERE analysisfeature_id = $af_id AND type_id = %d";
+			$test_afpid = db_result(db_query($sql_test, $type_id));
+			
+			//Insert only if this blast query not exists. 
+			if (!$test_afpid) {
+				$afp_sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) VALUES (%d, %d, '%s', 0)";
+				
+				//$query;
+				db_query($afp_sql, $af_id, $type_id, $query);
+
+				//$hit_name;
+				$type_id = db_result(db_query($sql, 'analysis_blast_besthit_match'));
+				db_query($afp_sql, $af_id, $type_id, $hit_name);
+					
+				//$description;
+				$type_id = db_result(db_query($sql, 'analysis_blast_besthit_description'));
+				db_query($afp_sql, $af_id, $type_id, $description);
+					
+				//$best_evalue;
+				$type_id = db_result(db_query($sql, 'analysis_blast_besthit_evalue'));
+				$e_digit = explode("e-", $best_evalue);
+				if (count($e_digit) == 2) {
+					$evalue_shown = number_format($e_digit [0],1);
+					$best_evalue = $evalue_shown."e-".$e_digit[1];
+				} 
+				db_query($afp_sql, $af_id, $type_id, $best_evalue);
+					
+				//$best_identity;
+				$type_id = db_result(db_query($sql, 'analysis_blast_besthit_identity'));
+				$percent_identity = number_format($best_identity/$best_len*100, 1);
+				db_query($afp_sql, $af_id, $type_id, $percent_identity);
+					
+				//$best_len;
+				$type_id = db_result(db_query($sql, 'analysis_blast_besthit_length'));
+				db_query($afp_sql, $af_id, $type_id, $best_len);
+				
+			// Otherwise, update all instead
+			} else {
+				$afp_sql = "UPDATE {analysisfeatureprop} SET analysisfeature_id = %d, type_id = %d, value = '%s', rank = 0 WHERE analysisfeatureprop_id = %d";
+
+				//$query;
+				db_query($afp_sql, $af_id, $type_id, $query, $test_afpid);
+				
+				//$hit_name;
+				$type_id = db_result(db_query($sql, 'analysis_blast_besthit_match'));
+				$test_afpid = db_result(db_query($sql_test, $type_id));
+				db_query($afp_sql, $af_id, $type_id, $hit_name, $test_afpid);
+
+				//$description;
+				$type_id = db_result(db_query($sql, 'analysis_blast_besthit_description'));
+				$test_afpid = db_result(db_query($sql_test, $type_id));
+				db_query($afp_sql, $af_id, $type_id, $description, $test_afpid);
+					
+				//$best_evalue;
+				$type_id = db_result(db_query($sql, 'analysis_blast_besthit_evalue'));
+				$test_afpid = db_result(db_query($sql_test, $type_id));
+				$e_digit = explode("e-", $best_evalue);
+				if (count($e_digit) == 2) {
+					$evalue_shown = number_format($e_digit [0],1);
+					$best_evalue = $evalue_shown."e-".$e_digit[1];
+				} 
+				db_query($afp_sql, $af_id, $type_id, $best_evalue, $test_afpid);
+					
+				//$best_identity;
+				$type_id = db_result(db_query($sql, 'analysis_blast_besthit_identity'));
+				$test_afpid = db_result(db_query($sql_test, $type_id));
+				$percent_identity = number_format($best_identity/$best_len*100, 1);
+				db_query($afp_sql, $af_id, $type_id, $percent_identity, $test_afpid);
+					
+				//$best_len;
+				$type_id = db_result(db_query($sql, 'analysis_blast_besthit_length'));
+				$test_afpid = db_result(db_query($sql_test, $type_id));
+				db_query($afp_sql, $af_id, $type_id, $best_len, $test_afpid);
+				
+			}
+			tripal_db_set_active($previous_db);
+			
+			break;
+		}
+	}
+   print "100%\n";
+	return;
+}

+ 243 - 0
tripal_analysis_blast/tripal_analysis_blast_htmlreport.inc

@@ -0,0 +1,243 @@
+<?php
+
+function tripal_get_blast_report ($analysis_id, $currentpage, $sort, $descending, $per_page) {
+	/*
+	if (!tripal_check_permission_by_analysis_id($analysis_id)) {
+		$html_out = "Access denied. You are not authorized to access this report.";
+		return $html_out;
+	}
+   */
+	$symbol = "&and;";
+	$sort_key = "ASC";
+	if ($descending == 1) {
+		$symbol = "&or;";
+		$sort_key = "DESC";
+	}
+	$header = 'analysis_blast_besthit_query';
+	$convert = 0;
+	if ($sort == 1) {
+		$header = 'analysis_blast_besthit_match';
+	} else if ($sort == 2) {
+		$header = 'analysis_blast_besthit_description';
+	} else if ($sort == 3) {
+		$header = 'analysis_blast_besthit_evalue';
+		$convert = 1;
+	} else if ($sort == 4) {
+		$header = 'analysis_blast_besthit_identity';
+		$convert = 1;
+	} else if ($sort == 5) {
+		$header = 'analysis_blast_besthit_length';
+		$convert = 1;
+	}
+
+	$previous_db = tripal_db_set_active('chado');
+	// Get analysis information including 'Time', 'Name', and 'DB Settings'
+	$sql = "SELECT value, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
+                FROM {analysis}  A
+                INNER JOIN {analysisprop} AP ON  A.analysis_id = AP.analysis_id 
+                WHERE A.analysis_id = %d
+                AND type_id= (SELECT cvterm_id 
+                                           FROM {cvterm} 
+                                           WHERE name = 'analysis_blast_settings')";
+	$analysis = db_fetch_object(db_query($sql, $analysis_id));
+	
+	// Parse the blast settings
+	$blastsettings = explode("|", $analysis->value);
+	$db_id = $blastsettings [0];
+	
+	// Get db 'urlprefix'
+	$sql = "SELECT urlprefix FROM {db} WHERE db_id=%d";
+	$urlprefix = db_result(db_query($sql, $db_id));
+	
+	// Get all analysisfeature_id's sorted by the selected header for this blast analysis
+	$sql = "SELECT AFP.analysisfeature_id
+	             FROM {analysisfeature} AF 
+	             INNER JOIN {analysisfeatureprop} AFP ON AF.analysisfeature_id = AFP.analysisfeature_id
+	             WHERE analysis_id = %d
+	             AND AFP.type_id = (SELECT cvterm_id FROM {cvterm} WHERE name = '%s' AND cv_id = (SELECT cv_id FROM {cv} WHERE name = 'tripal'))
+	             ORDER BY value ";
+	// Convert string to number for evalue, %identity, and length before SQL sorting
+	if ($convert == 1) {
+		$sql .= "::numeric  ";
+	} 
+	$sql .= "$sort_key";
+
+	$result = db_query($sql, $analysis_id, $header);
+	tripal_db_set_active($previous_db);
+	$analysisfeatureSet = array();
+	// Count how many analysisfeature_id we have and store them in order in $analysisfeatureSet
+	$counter = 0;	
+	while ($feature = db_fetch_object($result)) {
+		$analysisfeatureSet [$counter] = $feature->analysisfeature_id;
+		$counter ++;
+	}
+   
+	// Get analysis node id
+	$ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $analysis_id));
+	$ana_url = url("node/".$ana_nid);
+	
+	// Get pager info
+	$no_page = (int) ($counter / $per_page);
+	if ($counter % $per_page != 0) {
+		$no_page ++;
+	}
+	$first_item = ($currentpage - 1)* $per_page;
+	$url = url(drupal_get_path('theme', 'tripal')."/images/ajax-loader.gif");
+	$html_out .= "<div id=\"tripal_ajaxLoading\" style=\"display:none\">".
+                   "<div id=\"loadingText\">Loading...</div>".
+                   "<img src=\"$url\"></div>";
+	$html_out .= "<div id=\"blast-hits-report\">
+                             <strong>Analysis Date: </strong>$analysis->time (<a href=$ana_url>$analysis->name</a>)<br>
+                             There are <strong>$counter</strong> records. <div id=tripal_blast_report_current_page>page <strong>$currentpage</strong> of <strong>$no_page</strong></div>
+	                          <table id=\"tripal_analysi_blast-report-table\" class=\"tripal_analysis_blast-table tripal-table tripal-table-horz\">
+	                            <tr class=\"tripal_analysis_blast-table-odd-row tripal-table-odd-row\">";
+
+	$html_out .= "  <th nowrap>";	if ($sort == 0) {$html_out .= $symbol;}
+
+	// Construct URL path for different sorting column
+	$path =  "tripal_blast_report/$analysis_id/1/0/";
+	if ($descending == 0 && $sort == 0) {
+		$path .= "1";
+	} else {
+		$path .= "0";
+	}
+	$path .= "/$per_page";
+	$url_path = url ($path);
+	$html_out.="<a href=$url_path>Query</a></th><th nowrap>";	if ($sort == 1) {$html_out .= $symbol;}
+
+	$path =  "tripal_blast_report/$analysis_id/1/1/";
+	if ($descending == 0 && $sort == 1) {
+		$path .= "1";
+	} else {
+		$path .= "0";
+	}
+	$path .= "/$per_page";
+	$url_path = url ($path);
+	$html_out.="<a href=$url_path>Match Name</a></th><th nowrap>";	if ($sort == 2) {$html_out .= $symbol;}
+
+	$path =  "tripal_blast_report/$analysis_id/1/2/";
+	if ($descending == 0 && $sort == 2) {
+		$path .= "1";
+	} else {
+		$path .= "0";
+	}
+	$path .= "/$per_page";
+	$url_path = url ($path);
+	$html_out.="<a href=$url_path>Description</a></th><th nowrap>";	if ($sort == 3) {$html_out .= $symbol;}
+
+	$path =  "tripal_blast_report/$analysis_id/1/3/";
+	if ($descending == 0 && $sort == 3) {
+		$path .= "1";
+	} else {
+		$path .= "0";
+	}
+	$path .= "/$per_page";
+	$url_path = url ($path);
+	$html_out.="<a href=$url_path>E-value</a></th><th nowrap>";	if ($sort == 4) {$html_out .= $symbol;}
+
+	$path =  "tripal_blast_report/$analysis_id/1/4/";
+	if ($descending == 0 && $sort == 4) {
+		$path .= "1";
+	} else {
+		$path .= "0";
+	}
+	$path .= "/$per_page";
+	$url_path = url ($path);
+	$html_out.="<a href=$url_path>%Identity</a></th><th nowrap>";	if ($sort == 5) {$html_out .= $symbol;}
+
+	$path =  "tripal_blast_report/$analysis_id/1/5/";
+	if ($descending == 0 && $sort == 5) {
+		$path .= "1";
+	} else {
+		$path .= "0";
+	}
+	$path .= "/$per_page";
+	$url_path = url ($path);
+	$html_out.="<a href=$url_path>Length</a></th>";
+
+	$html_out .=      "</tr>";
+
+	// Get table content
+	$sql = "SELECT value
+	             FROM {analysisfeatureprop} 
+	             WHERE analysisfeature_id = %d 
+	             AND type_id = (SELECT cvterm_id FROM {cvterm} WHERE name = '%s' AND cv_id = (SELECT cv_id FROM {cv} WHERE name = 'tripal'))";
+	$j = 0;
+	for ($i = $first_item; $i < $first_item +$per_page; $i ++) {
+		$previous_db = tripal_db_set_active('chado');
+		$query = db_result(db_query($sql, $analysisfeatureSet[$i], 'analysis_blast_besthit_query'));
+		$match = db_result(db_query($sql, $analysisfeatureSet[$i], 'analysis_blast_besthit_match'));
+		$desc = db_result(db_query($sql, $analysisfeatureSet[$i], 'analysis_blast_besthit_description'));
+		$evalue = db_result(db_query($sql, $analysisfeatureSet[$i], 'analysis_blast_besthit_evalue'));
+		$identity = db_result(db_query($sql, $analysisfeatureSet[$i], 'analysis_blast_besthit_identity'));
+		$length = db_result(db_query($sql, $analysisfeatureSet[$i], 'analysis_blast_besthit_length'));
+		
+		$sql_fid = "SELECT feature_id FROM analysisfeature WHERE analysisfeature_id = %d";
+		$fid = db_result(db_query($sql_fid, $analysisfeatureSet[$i]));
+		tripal_db_set_active($previous_db);
+		
+		$q_url = url("ID$fid");
+		$class = 'tripal_analysis_blast-table-odd-row tripal-table-odd-row';
+	 	if($j % 2 == 0 ){
+         $class = 'tripal_analysis_blast-table-even-row tripal-table-even-row';
+      }
+		if ($query) {
+			$html_out .= "<tr class=\"$class\">
+		                           <td nowrap><a href=$q_url>$query</a></td>
+		                           <td nowrap><a href=\"$urlprefix$match\" target=\"_blank\">$match</td>
+		                           <td>$desc</td>
+		                           <td nowrap>$evalue</td>
+		                           <td nowrap>$identity</td>
+		                           <td nowrap>$length</td>
+		                         </tr>";
+		}
+		$j ++;
+	}
+	
+	// The number of items to show on the page
+	$path =  "tripal_blast_report/$analysis_id/1/$sort/$descending/";
+	$html_out .= '</table>
+	                           <div id=tripal_blast_report_per_page>Show ';
+	if ($per_page == 10) {
+		$html_out .= "<strong>10</strong> | ";
+	} else {
+		$url_path = url($path."10");
+		$html_out .= "<a href=$url_path>10</a> | ";
+	}
+	if ($per_page == 20) {
+		$html_out .= "<strong>20</strong> | ";
+	} else {
+		$url_path = url($path."20");
+		$html_out .= "<a href=$url_path>20</a> | ";
+	}
+	if ($per_page == 50) {
+		$html_out .= "<strong>50</strong> | ";
+	} else {
+		$url_path = url($path."50");
+		$html_out .= "<a href=$url_path>50</a> | ";
+	}
+	if ($per_page == 100) {
+		$html_out .= "<strong>100</strong> | ";
+	} else {
+		$url_path = url($path."100");
+		$html_out .= "<a href=$url_path>100</a> | ";
+	}
+	if ($per_page == 200) {
+		$html_out .= "<strong>200</strong> | ";
+	} else {
+		$url_path = url($path."200");
+   	$html_out .= "<a href=$url_path>200</a>";
+	}
+
+	// Make AJAX call to update the  page which user selected 
+   $html_out .= ' records per page
+	                           </div>
+	                           <div id=tripal_blast_report_pager>page
+	                             <select id=tripal_blast_report_page_selector onChange="tripal_update_best_hit_report(this,'.$analysis_id.','.$sort.','.$descending.','.$per_page.')">';
+   for ($i = 1; $i <= $no_page; $i ++) {
+   	$html_out .= "<option value=$i>$i</option>";
+   }	                           
+   $html_out .=  '</select></div>
+	                        </div>';
+	return $html_out;
+}