|
@@ -516,158 +516,188 @@ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $p
|
|
|
$interval = intval($no_iterations * 0.01);
|
|
|
$idx_iterations = 0;
|
|
|
|
|
|
- // Processed the tables
|
|
|
- foreach ($tables as $table) {
|
|
|
- if ($table->getName() == 'table') {
|
|
|
- // Set job status
|
|
|
- $idx_iterations ++;
|
|
|
- if ($idx_iterations % $interval == 0) {
|
|
|
- $percentage = (int) ($idx_iterations / $no_iterations * 100);
|
|
|
- db_set_active($previous_db);
|
|
|
- tripal_job_set_progress($job_id, $percentage);
|
|
|
- $previous_db = db_set_active('chado');
|
|
|
- print $percentage."% ";
|
|
|
- }
|
|
|
-
|
|
|
- // Get the first row and match its name with the feature name
|
|
|
- $firsttd = $table->children()->children()->children();
|
|
|
- $feature_id = 0;
|
|
|
- foreach($firsttd as $b) {
|
|
|
- foreach($b->children() as $a) {
|
|
|
- if ($a->getName() == 'a') {
|
|
|
- // Remove _ORF from the sequence name
|
|
|
- $seqname = preg_replace('/^(.+?)_\d_.+/', "$1", $a);
|
|
|
-
|
|
|
- // Find out how many features match this uniquename
|
|
|
- $sql = "SELECT count(feature_id) FROM {feature} ".
|
|
|
- "WHERE uniquename = '%s' ";
|
|
|
- $no_features = db_result(db_query($sql, $seqname));
|
|
|
-
|
|
|
- // If there is only one match, get the feature_id
|
|
|
- if ($no_features == 1) {
|
|
|
- $sql = "SELECT feature_id FROM {feature} ".
|
|
|
- "WHERE uniquename = '%s' ";
|
|
|
- $feature_id = db_result(db_query($sql, $seqname));
|
|
|
-
|
|
|
- // If the uniquename matches more than one features then skip and print 'Ambiguous'
|
|
|
- } else if ($no_features > 1) {
|
|
|
- fwrite($log, "Ambiguous: ".$seqname." matches more than one feature and is not processed.\n");
|
|
|
- continue;
|
|
|
-
|
|
|
- // If the uniquename did not match, skip and print 'Failed'
|
|
|
- } else {
|
|
|
- fwrite($log, "Failed: ".$seqname."\n");
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- // Successfully matched. print 'Succeeded'. Add analysis_id and
|
|
|
- // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
|
|
|
- if ($feature_id) {
|
|
|
-
|
|
|
- //------------------------------------
|
|
|
- // Insert into analysisfeature table
|
|
|
- //------------------------------------
|
|
|
- $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
|
|
|
- "VALUES (%d, %d)";
|
|
|
- db_query ($sql, $feature_id, $analysis_id);
|
|
|
-
|
|
|
- // If a matched feature is found, write to log.
|
|
|
- fwrite($log, "Succeeded: ".$seqname." => feature id:".$feature_id);
|
|
|
-
|
|
|
- // Get the higest rank for this feature_id in analysisfeatureprop table.
|
|
|
- // If the value of the inserting content is not duplicate, add it to
|
|
|
- // analysisfeaturepro with 'higest_rank + 1'
|
|
|
- $sql = "SELECT MAX(rank) FROM {analysisfeatureprop} AFP ".
|
|
|
- "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
|
|
|
- "WHERE feature_id=%d ".
|
|
|
- "AND analysis_id=%d ".
|
|
|
- "AND type_id=%d ";
|
|
|
-
|
|
|
- $afp = db_fetch_object(db_query($sql, $feature_id, $analysis_id, $type_id));
|
|
|
- $hi_rank = 0;
|
|
|
- if ($afp) {
|
|
|
- $hi_rank = $afp->max + 1;
|
|
|
+ // Processed the tables
|
|
|
+ foreach ($tables as $table) {
|
|
|
+ //if (preg_match('/No hits reported/', $table->asXML()) ) {
|
|
|
+ //print "skipping this table b/c no hits are reported\n";
|
|
|
+ //}
|
|
|
+ // make sure we are looking at a table and its not an empty table
|
|
|
+ if ($table->getName() == 'table' && !preg_match('/No hits reported/', $table->asXML()) ) {
|
|
|
+ $idx_iterations ++;
|
|
|
+ if ($idx_iterations % $interval == 0) {
|
|
|
+ $percentage = (int) ($idx_iterations / $no_iterations * 100);
|
|
|
+ db_set_active($previous_db);
|
|
|
+ tripal_job_set_progress($job_id, $percentage);
|
|
|
+ $previous_db = db_set_active('chado');
|
|
|
+ print $percentage."% ";
|
|
|
}
|
|
|
-
|
|
|
- // Get the newly inserted analysisfeature_id
|
|
|
- $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
|
|
|
- $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
|
|
|
- //------------------------------------------------------------
|
|
|
- // Insert interpro html tags into analysisfeatureprop table
|
|
|
- //------------------------------------------------------------
|
|
|
- // Before inserting, make sure it's not a duplicate
|
|
|
- $sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id = %d AND type_id = %d";
|
|
|
- $result = db_query($sql, $analysisfeature_id, $type_id);
|
|
|
- $duplicate = 0;
|
|
|
- while ($afp_value = db_fetch_object($result)) {
|
|
|
- if ($table->asXML() == $afp_value->value) {
|
|
|
- $duplicate = 1;
|
|
|
- }
|
|
|
- }
|
|
|
- if (!$duplicate) {
|
|
|
- $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
|
|
|
- "VALUES (%d, %d, '%s', %d)";
|
|
|
- db_query($sql, $analysisfeature_id, $type_id, $table->asXML(), $hi_rank);
|
|
|
- fwrite($log, " (Insert)\n"); // write to log
|
|
|
- } else {
|
|
|
- fwrite($log, " (Skipped)\n");
|
|
|
- }
|
|
|
-
|
|
|
- // Parse GO terms. Make sure GO database schema is installed in chado
|
|
|
- $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
|
|
|
- if (!$go_db_id) {
|
|
|
- print 'GO schema not installed in chado. GO terms are not processed.';
|
|
|
+
|
|
|
+ // Set job status
|
|
|
+ // Get the first row and match its name with the feature name
|
|
|
+ $firsttd = $table->children()->children()->children();
|
|
|
+ $feature_id = 0;
|
|
|
+ foreach($firsttd as $b) {
|
|
|
+ foreach($b->children() as $a) {
|
|
|
+ if ($a->getName() == 'a') {
|
|
|
+ // Remove _ORF from the sequence name
|
|
|
+ $seqname = preg_replace('/^(.+?)_\d_.+/', "$1", $a);
|
|
|
+ print "seqname is $seqname\n";
|
|
|
+
|
|
|
+ // Find out how many features match this uniquename
|
|
|
+ $sql = "SELECT count(feature_id) FROM {feature} ".
|
|
|
+ "WHERE uniquename = '%s' ";
|
|
|
+ $no_features = db_result(db_query($sql, $seqname));
|
|
|
+
|
|
|
+ // If there is only one match, get the feature_id
|
|
|
+ if ($no_features == 1) {
|
|
|
+ $sql = "SELECT feature_id FROM {feature} ".
|
|
|
+ "WHERE uniquename = '%s' ";
|
|
|
+ $feature_id = db_result(db_query($sql, $seqname));
|
|
|
+ print "\tfeature id is $feature_id\n";
|
|
|
+
|
|
|
+ // If the uniquename matches more than one features then skip and print 'Ambiguous'
|
|
|
+ } else if ($no_features > 1) {
|
|
|
+ fwrite($log, "Ambiguous: ".$seqname." matches more than one feature and is not processed.\n");
|
|
|
+ continue;
|
|
|
+
|
|
|
+ // If the uniquename did not match, skip and print 'Failed'
|
|
|
+ } else {
|
|
|
+ fwrite($log, "Failed: ".$seqname."\n");
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
- if ($go_db_id && $parsego) {
|
|
|
- $trs = $table->children();
|
|
|
- foreach ($trs as $tr) {
|
|
|
- $tds = $tr->children();
|
|
|
- foreach($tds as $td) {
|
|
|
- $gotags = $td->children();
|
|
|
- foreach ($gotags as $gotag) {
|
|
|
- // Look for 'GO:accession#'
|
|
|
- if (preg_match("/^.*?GO:(\d+).*$/", $gotag, $matches)) {
|
|
|
-
|
|
|
- // Find cvterm_id for the matched GO term
|
|
|
- $sql = "SELECT cvterm_id FROM {cvterm} CVT
|
|
|
- INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
|
|
|
- WHERE DBX.accession = '%s' AND DBX.db_id = %d";
|
|
|
- $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
|
|
|
-
|
|
|
- //-------------------------------------------
|
|
|
- // Insert GO terms into feature_cvterm table
|
|
|
- //-------------------------------------------
|
|
|
- // Default pub_id = 1 (NULL) was used
|
|
|
- $sql = "INSERT INTO {feature_cvterm} (feature_id, cvterm_id, pub_id)
|
|
|
- VALUES (%d, %d, 1)";
|
|
|
- db_query($sql, $feature_id, $goterm_id);
|
|
|
-
|
|
|
- //------------------------------------------------
|
|
|
- // Insert GO terms into analysisfeatureprop table
|
|
|
- //------------------------------------------------
|
|
|
- $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) ".
|
|
|
- "VALUES (%d, %d, '%s', 0)";
|
|
|
- db_query($sql, $analysisfeature_id, $goterm_id, $matches[1]);
|
|
|
-
|
|
|
+
|
|
|
+ // Successfully matched. print 'Succeeded'. Add analysis_id and
|
|
|
+ // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop
|
|
|
+ if ($feature_id) {
|
|
|
+
|
|
|
+ //------------------------------------
|
|
|
+ // Clease unwanted rows from the table
|
|
|
+ //------------------------------------
|
|
|
+
|
|
|
+ $parent_row = "/<tr><td valign=\"top\"><b>Parent<\/b><\/td>\s*<td valign=\"top\">\s*no.*?parent<\/td>\s*<\/tr>/";
|
|
|
+ $children_row = "/<tr><td valign=\"top\"><b>Children<\/b><\/td>\s*<td valign=\"top\">\s*no.*?children<\/td>\s*<\/tr>/";
|
|
|
+ $found_row = "/<tr><td valign=\"top\"><b>Found.*?in<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
|
|
|
+ $contains_row = "/<tr><td valign=\"top\"><b>Contains<\/b><\/td>\s*<td valign=\"top\">\s*no.*?entries<\/td>\s*<\/tr>/";
|
|
|
+ $go_row = "/<tr><td valign=\"top\"><b>GO.*?terms<\/b><\/td>\s*<td valign=\"top\">\s*none<\/td>\s*<\/tr>/";
|
|
|
+
|
|
|
+ $table_txt = $table->asXML();
|
|
|
+ $table_txt = preg_replace($parent_row, "", $table_txt);
|
|
|
+ $table_txt = preg_replace($children_row, "", $table_txt);
|
|
|
+ $table_txt = preg_replace($found_row, "", $table_txt);
|
|
|
+ $table_txt = preg_replace($contains_row, "", $table_txt);
|
|
|
+ $table_txt = preg_replace($go_row, "", $table_txt);
|
|
|
+
|
|
|
+ //print "----------------------------\n";
|
|
|
+ //print "old: ".$table->asXML()."\n\n\n";
|
|
|
+ //print "----------------------------\n";
|
|
|
+ //print "Fixed: $table_txt\n";
|
|
|
+ //print "----------------------------\n";
|
|
|
+
|
|
|
+ //------------------------------------
|
|
|
+ // Insert into analysisfeature table
|
|
|
+ //------------------------------------
|
|
|
+ $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
|
|
|
+ "VALUES (%d, %d)";
|
|
|
+ db_query ($sql, $feature_id, $analysis_id);
|
|
|
+
|
|
|
+ // If a matched feature is found, write to log.
|
|
|
+ fwrite($log, "Succeeded: ".$seqname." => feature id:".$feature_id);
|
|
|
+
|
|
|
+ // Get the higest rank for this feature_id in analysisfeatureprop table.
|
|
|
+ // If the value of the inserting content is not duplicate, add it to
|
|
|
+ // analysisfeaturepro with 'higest_rank + 1'
|
|
|
+ $sql = "SELECT MAX(rank) FROM {analysisfeatureprop} AFP ".
|
|
|
+ "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
|
|
|
+ "WHERE feature_id=%d ".
|
|
|
+ "AND analysis_id=%d ".
|
|
|
+ "AND type_id=%d ";
|
|
|
+
|
|
|
+ $afp = db_fetch_object(db_query($sql, $feature_id, $analysis_id, $type_id));
|
|
|
+ $hi_rank = 0;
|
|
|
+ if ($afp) {
|
|
|
+ $hi_rank = $afp->max + 1;
|
|
|
+ }
|
|
|
+ // Get the newly inserted analysisfeature_id
|
|
|
+ $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
|
|
|
+ $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
|
|
|
+ //------------------------------------------------------------
|
|
|
+ // Insert interpro html tags into analysisfeatureprop table
|
|
|
+ //------------------------------------------------------------
|
|
|
+ // Before inserting, make sure it's not a duplicate
|
|
|
+ $sql = "SELECT value FROM {analysisfeatureprop} WHERE analysisfeature_id = %d AND type_id = %d";
|
|
|
+ $result = db_query($sql, $analysisfeature_id, $type_id);
|
|
|
+ $duplicate = 0;
|
|
|
+ while ($afp_value = db_fetch_object($result)) {
|
|
|
+ if ($table_txt == $afp_value->value) {
|
|
|
+ $duplicate = 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (!$duplicate) {
|
|
|
+ $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
|
|
|
+ "VALUES (%d, %d, '%s', %d)";
|
|
|
+ db_query($sql, $analysisfeature_id, $type_id, $table_txt, $hi_rank);
|
|
|
+ fwrite($log, " (Insert)\n"); // write to log
|
|
|
+ print "\twriting table\n";
|
|
|
+ } else {
|
|
|
+ fwrite($log, " (Skipped)\n");
|
|
|
+ print "\tskipping table - dup\n";
|
|
|
+ }
|
|
|
+
|
|
|
+ // Parse GO terms. Make sure GO database schema is installed in chado
|
|
|
+ $go_db_id = db_result(db_query("SELECT db_id FROM {db} WHERE name='GO'"));
|
|
|
+ if (!$go_db_id) {
|
|
|
+ print 'GO schema not installed in chado. GO terms are not processed.';
|
|
|
+ }
|
|
|
+ if ($go_db_id && $parsego) {
|
|
|
+ $trs = $table->children();
|
|
|
+ foreach ($trs as $tr) {
|
|
|
+ $tds = $tr->children();
|
|
|
+ foreach($tds as $td) {
|
|
|
+ $gotags = $td->children();
|
|
|
+ foreach ($gotags as $gotag) {
|
|
|
+ // Look for 'GO:accession#'
|
|
|
+ if (preg_match("/^.*?GO:(\d+).*$/", $gotag, $matches)) {
|
|
|
+
|
|
|
+ // Find cvterm_id for the matched GO term
|
|
|
+ $sql = "SELECT cvterm_id FROM {cvterm} CVT
|
|
|
+ INNER JOIN dbxref DBX ON CVT.dbxref_id = DBX.dbxref_id
|
|
|
+ WHERE DBX.accession = '%s' AND DBX.db_id = %d";
|
|
|
+ $goterm_id = db_result(db_query($sql, $matches[1], $go_db_id));
|
|
|
+
|
|
|
+ //-------------------------------------------
|
|
|
+ // Insert GO terms into feature_cvterm table
|
|
|
+ //-------------------------------------------
|
|
|
+ // Default pub_id = 1 (NULL) was used
|
|
|
+ $sql = "INSERT INTO {feature_cvterm} (feature_id, cvterm_id, pub_id)
|
|
|
+ VALUES (%d, %d, 1)";
|
|
|
+ db_query($sql, $feature_id, $goterm_id);
|
|
|
+
|
|
|
+ //------------------------------------------------
|
|
|
+ // Insert GO terms into analysisfeatureprop table
|
|
|
+ //------------------------------------------------
|
|
|
+ $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) ".
|
|
|
+ "VALUES (%d, %d, '%s', 0)";
|
|
|
+ db_query($sql, $analysisfeature_id, $goterm_id, $matches[1]);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
-
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- db_set_active ($previous_db); // Use drupal database
|
|
|
- print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
|
|
|
-
|
|
|
- fwrite($log, "\n");
|
|
|
- fclose($log);
|
|
|
- return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ db_set_active ($previous_db); // Use drupal database
|
|
|
+ print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
|
|
|
+
|
|
|
+ fwrite($log, "\n");
|
|
|
+ fclose($log);
|
|
|
+ return;
|
|
|
}
|
|
|
|
|
|
+
|
|
|
/*******************************************************************************
|
|
|
* tripal_analysis_interpro_nodeapi()
|
|
|
* HOOK: Implementation of hook_nodeapi()
|