Browse Source

Removed ORF link from html when it is being parsed.:

mestato 14 years ago
parent
commit
678799b710
1 changed files with 55 additions and 42 deletions
  1. 55 42
      tripal_analysis_interpro/tripal_analysis_interpro.module

+ 55 - 42
tripal_analysis_interpro/tripal_analysis_interpro.module

@@ -495,6 +495,7 @@ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $p
           "WHERE CVT.name = 'analysis_interpro_output_hit' ".
           "AND CV.name = 'tripal'";
 	$type_id = db_result(db_query($sql));
+	print "cvterm_id for analysis_interpro_output_iteration_hits is $type_id\n";
 
 	// Load the HTML file and convert it into XML for loading
 	$dom = new domDocument;
@@ -516,8 +517,8 @@ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $p
 	$interval = intval($no_iterations * 0.01);
 	$idx_iterations = 0;
 
-    // Processed the tables
-    foreach ($tables as $table) {
+	// Processed the tables
+	foreach ($tables as $table) {
         //if (preg_match('/No hits reported/', $table->asXML()) ) {
             //print "skipping this table b/c no hits are reported\n";
         //}
@@ -542,29 +543,29 @@ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $p
                         // Remove _ORF from the sequence name
                         $seqname = preg_replace('/^(.+?)_\d_.+/', "$1", $a);
                         print "seqname is $seqname\n";
-
+    
                         // Find out how many features match this uniquename
                         $sql = "SELECT count(feature_id) FROM {feature} ".
                         "WHERE uniquename = '%s' ";
                         $no_features = db_result(db_query($sql, $seqname));
-
+						    
                         // If there is only one match, get the feature_id
                         if ($no_features == 1) {
                             $sql = "SELECT feature_id FROM {feature} ".
                                     "WHERE uniquename = '%s' ";
                             $feature_id = db_result(db_query($sql, $seqname));
                             print "\tfeature id is $feature_id\n";
-
+    
                             // If the uniquename matches more than one features then skip and print 'Ambiguous'
                         } else if ($no_features > 1) {
                             fwrite($log, "Ambiguous: ".$seqname." matches more than one feature and is not processed.\n");
                             continue;
-
+    
                         // If the uniquename did not match, skip and print 'Failed'
                         } else {
                             fwrite($log, "Failed: ".$seqname."\n");
                         }
-
+    
                     }
                 }
             }
@@ -582,7 +583,7 @@ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $p
                 $found_row    = "/<tr><td valign=\"top\"><b>Found.*?in<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
                 $contains_row = "/<tr><td valign=\"top\"><b>Contains<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
                 $go_row       = "/<tr><td valign=\"top\"><b>GO.*?terms<\/b><\/td>\s*<td valign=\"top\">\s*none<\/td>\s*<\/tr>/";
-
+    
                 $table_txt = $table->asXML();
                 $table_txt = preg_replace($parent_row, "", $table_txt);
                 $table_txt = preg_replace($children_row, "", $table_txt);
@@ -590,6 +591,12 @@ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $p
                 $table_txt = preg_replace($contains_row, "", $table_txt);
                 $table_txt = preg_replace($go_row, "", $table_txt);
 
+                //------------------------------------
+                // Clease unwanted ORF link from table 
+                //------------------------------------
+                $orf_link = "/<b><a href=\"\/iprscan\/wget.*?\">(.*?)<\/a><\/b>/";
+                $table_txt = preg_replace($orf_link, "$1", $table_txt);
+
                 //print "----------------------------\n";
                 //print "old: ".$table->asXML()."\n\n\n";
                 //print "----------------------------\n";
@@ -597,20 +604,30 @@ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $p
                 //print "----------------------------\n";
 
                 //------------------------------------
-                // Insert into analysisfeature table
+                // If this feature has already been associated with this analysis, do not reinsert
+                // Otherwise, Insert into analysisfeature table
                 //------------------------------------
-                $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
+                $sql = "Select analysisfeature_id as id from {analysisfeature} where feature_id = %d and analysis_id = %d";
+                $analysisfeature =  db_fetch_object(db_query($sql, $feature_id, $analysis_id));
+                if($analysisfeature){ $analysisfeature_id = $analysisfeature->id; }
+                if(!$analysisfeature_id){
+                    print "inserting analysisfeature\n";
+                   $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
                         "VALUES (%d, %d)";
-                db_query ($sql, $feature_id, $analysis_id);
+                   db_query ($sql, $feature_id, $analysis_id);
+                   $sql = "Select analysisfeature_id from {analysisfeature} where feature_id = %d and analysis_id = %d";
+                   $analysisfeature =  db_fetch_object(db_query($sql, $feature_id, $analysis_id));
+                   $analysisfeature_id = $analysisfeature->id;
+                }
+
+                print "analysisfeature_id is $analysisfeature_id (analysis_id = $analysis_id; feature_id = $feature_id)\n";
 
-                // If a matched feature is found, write to log.
-                fwrite($log, "Succeeded: ".$seqname." => feature id:".$feature_id);
 
                 // Get the higest rank for this feature_id in analysisfeatureprop table.
                 // If the value of the inserting content is not duplicate, add it to  
                 // analysisfeaturepro with 'higest_rank + 1'
                 $sql = "SELECT MAX(rank) FROM {analysisfeatureprop} AFP ".
-                        "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
+                        "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".       
                         "WHERE feature_id=%d ".
                         "AND analysis_id=%d ".
                         "AND type_id=%d ";
@@ -620,32 +637,30 @@ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $p
                 if ($afp) {
                     $hi_rank = $afp->max + 1;
                 }
-                // Get the newly inserted analysisfeature_id
-                $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
-                $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
-                //------------------------------------------------------------
-                // Insert interpro html tags into analysisfeatureprop table
-                //------------------------------------------------------------
-                // Before inserting, make sure it's not a duplicate
-                $sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id = %d AND type_id = %d";
-                $result = db_query($sql, $analysisfeature_id, $type_id);
-                $duplicate = 0;
-                while ($afp_value = db_fetch_object($result)) {
-                    if ($table_txt == $afp_value->value) {
+            
+				//------------------------------------------------------------
+				// Insert interpro html tags into analysisfeatureprop table
+				//------------------------------------------------------------
+				// Before inserting, make sure it's not a duplicate
+				$sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id = %d AND type_id = %d";
+				$result = db_query($sql, $analysisfeature_id, $type_id);
+				$duplicate = 0;
+				while ($afp_value = db_fetch_object($result)) {
+				    if ($table_txt == $afp_value->value) {
                         $duplicate = 1;
-                    }
-                }
-                if (!$duplicate) {
-                    $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
+				    }
+				}
+				if (!$duplicate) {
+				    $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
                             "VALUES (%d, %d, '%s', %d)";
-                    db_query($sql, $analysisfeature_id, $type_id, $table_txt, $hi_rank);
-                    fwrite($log, " (Insert)\n"); // write to log
+				    db_query($sql, $analysisfeature_id, $type_id, $table_txt, $hi_rank);
+				    fwrite($log, " (Insert)\n"); // write to log
                     print "\twriting table\n";
-                } else {
-                   fwrite($log, " (Skipped)\n");
+				} else {
+				   fwrite($log, " (Skipped)\n");
                     print "\tskipping table - dup\n";
-                }
-
+				}
+				
                 // Parse GO terms. Make sure GO database schema is installed in chado
                 $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
                 if (!$go_db_id) {
@@ -660,13 +675,13 @@ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $p
                             foreach ($gotags as $gotag) {
                                 // Look for 'GO:accession#'
                                 if (preg_match("/^.*?GO:(\d+).*$/", $gotag, $matches)) {
-
+                        	
                                     // Find cvterm_id for the matched GO term
                                     $sql = "SELECT cvterm_id FROM {cvterm} CVT 
                                             INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
                                             WHERE DBX.accession = '%s' AND DBX.db_id = %d";
                                     $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
-
+                           
                                     //-------------------------------------------
                                     // Insert GO terms into feature_cvterm table
                                     //-------------------------------------------
@@ -691,13 +706,12 @@ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $p
     }
     db_set_active ($previous_db); // Use drupal database
     print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
-
+	
     fwrite($log, "\n");
     fclose($log);
     return;
 }
 
-
 /*******************************************************************************
  * tripal_analysis_interpro_nodeapi()
  * HOOK: Implementation of hook_nodeapi()
@@ -824,9 +838,8 @@ function tripal_get_interpro_results($feature_id){
                  INNER JOIN analysisfeature AF on AF.analysisfeature_id = AFP.analysisfeature_id
                  WHERE AF.analysis_id = %d
                  AND AF.feature_id = %d
-                 AND AFP.type_id = %d
                 ";
-         $interpro_results = db_query($sql, $ana->aid, $feature_id, $type_id);
+         $interpro_results = db_query($sql, $ana->aid, $feature_id);
       	 while ($afp = db_fetch_object($interpro_results)) {
       	 	$content .= $afp->afpvalue;
       	 }