t('Analysis: Interpro'), 'module' => 'chado_analysis_interpro', 'description' => t('An interpro analysis from the chado database'), 'has_title' => FALSE, 'title_label' => t('Analysis: Interpro'), 'has_body' => FALSE, 'body_label' => t('Interpro Analysis Description'), 'locked' => TRUE ); return $nodes; } /******************************************************************************* * */ function tripal_analysis_interpro_block($op = 'list', $delta = 0, $edit=array()){ switch($op) { case 'list': $blocks['tai_results']['info'] = t('Tripal InterProScan Analysis Results'); $blocks['tai_results']['cache'] = BLOCK_NO_CACHE; return $blocks; case 'view': if(user_access('access chado_analysis_interpro content') and arg(0) == 'node' and is_numeric(arg(1))) { $nid = arg(1); $node = node_load($nid); $block = array(); switch($delta){ case 'tai_results': $block['subject'] = t('InterProScan Results'); $block['content'] = theme('tripal_analysis_interpro_results', $node); break; default : } return $block; } } } /******************************************************************************* * Provide a Interpro Analysis form */ function chado_analysis_interpro_form ($node){ $type = node_get_types('type', $node); $form = array(); $form['title']= array( '#type' => 'hidden', '#default_value' => $node->title, ); $form['analysisname']= array( '#type' => 'textfield', '#title' => t('Analysis Name'), '#required' => FALSE, '#default_value' => $node->analysisname, '#weight' => 1 ); $form['program']= array( '#type' => 'textfield', '#title' => t('Program'), '#required' => TRUE, '#default_value' => $node->program, '#weight' => 2 ); $form['programversion']= array( '#type' => 'textfield', '#title' => t('Program Version'), '#required' => TRUE, '#default_value' => $node->programversion, '#weight' => 3 ); $form['algorithm']= array( '#type' => 'textfield', '#title' => t('Algorithm'), '#required' => FALSE, '#default_value' => $node->algorithm, '#weight' => 4 ); $form['sourcename']= array( '#type' => 'textfield', '#title' => t('Source Name'), '#required' => FALSE, '#default_value' => $node->sourcename, '#weight' => 5 ); $form['sourceversion']= array( '#type' => 'textfield', '#title' => t('Source Version'), '#required' => FALSE, '#default_value' => $node->sourceversion, '#weight' => 6 ); $form['sourceuri']= array( '#type' => 'textfield', '#title' => t('Source URI'), '#required' => FALSE, '#default_value' => $node->sourceuri, '#weight' => 7 ); // Get time saved in chado $default_time = $node->timeexecuted; $year = preg_replace("/^(\d+)-\d+-\d+ .*/", "$1", $default_time); $month = preg_replace("/^\d+-0?(\d+)-\d+ .*/", "$1", $default_time); $day = preg_replace("/^\d+-\d+-0?(\d+) .*/", "$1", $default_time); // If the time is not set, use current time if (!$default_time) { $default_time = time(); $year = format_date($default_time, 'custom', 'Y'); $month = format_date($default_time, 'custom', 'n'); $day = format_date($default_time, 'custom', 'j'); } $form['timeexecuted']= array( '#type' => 'date', '#title' => t('Time Executed'), '#required' => TRUE, '#default_value' => array( 'year' => $year, 'month' => $month, 'day' => $day, ), '#weight' => 8 ); $form['description']= array( '#type' => 'textarea', '#rows' => 15, '#title' => t('Description and/or Program Settings'), '#required' => FALSE, '#default_value' => check_plain($node->description), '#weight' => 9 ); //----InterProScan Settings (Shown only when Tripal Interpro is enabled) ---- if (preg_match("/.*\|.*/",$node->interprofile)) { $prop_values = explode("|", $node->interprofile); $node->interprofile = $prop_values[0]; $node->interproparameters = $prop_values[1]; } $moreSettings ['interpro'] = 'Interpro Settings'; $form['interpro'] = array( '#title' => t('Interpro Settings'), '#type' => 'fieldset', '#description' => t('Specific Settings for Interpro Analysis.'), '#collapsible' => TRUE, '#attributes' => array('id' => 'interpro-extra-settings'), '#weight' => 11 ); $form['interpro']['interprofile'] = array( '#title' => t('Interproscan Output File (in html format)'), '#type' => 'textfield', '#description' => t('The html output file generated by Interproscan in full path.'), '#default_value' => $node->interprofile, ); $form['interpro']['interprojob'] = array( '#type' => 'checkbox', '#title' => t('Submit a job to parse the Interpro html output'), '#description' => t('Note: features associated with the interpro results must '. 'exist in chado before parsing the file. Otherwise, interpro '. 'results that cannot be linked to a feature will be '. 'discarded. Also, Triapl Interpro module needs to be enabled.'), '#default_value' => $node->interprojob, '#attributes' => array( 'onclick' => 'return isSubmittingJob(this)' ) ); $form['interpro']['parsego'] = array( '#type' => 'checkbox', '#title' => t('Load GO terms to the database'), '#description' => t('Check the box to load GO terms to chado database'), '#default_value' => $node->parsego ); $form['interpro']['interproparameters'] = array( '#title' => t('Parameters'), '#type' => 'textfield', '#description' => t('The parameters for running the interpro analysis.'), '#default_value' => $node->interproparameters, ); return $form; } /******************************************************************************* * */ function chado_analysis_interpro_insert($node){ global $user; // Create a timestamp so we can insert it into the chado database $time = $node->timeexecuted; $month = $time['month']; $day = $time['day']; $year = $time['year']; $timestamp = $month.'/'.$day.'/'.$year; // If this analysis already exists then don't recreate it in chado $analysis_id = $node->analysis_id; if ($analysis_id) { $sql = "SELECT analysis_id ". "FROM {Analysis} ". "WHERE analysis_id = %d "; $previous_db = tripal_db_set_active('chado'); $analysis = db_fetch_object(db_query($sql, $node->analysis_id)); tripal_db_set_active($previous_db); } // If the analysis doesn't exist then let's create it in chado. if(!$analysis){ // First add the item to the chado analysis table $sql = "INSERT INTO {analysis} ". " (name, description, program, programversion, algorithm, ". " sourcename, sourceversion, sourceuri, timeexecuted) ". "VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s')"; $previous_db = tripal_db_set_active('chado'); // use chado database db_query($sql,$node->analysisname, $node->description, $node->program,$node->programversion,$node->algorithm, $node->sourcename, $node->sourceversion, $node->sourceuri, $timestamp); // find the newly entered analysis_id $sql = "SELECT analysis_id ". "FROM {Analysis} ". "WHERE program='%s'". "AND programversion='%s'". "AND sourcename='%s'"; $analysis_id = db_result(db_query($sql, $node->program, $node->programversion, $node->sourcename)); // Get cvterm_id for 'analysis_interpro_settings' $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ". "INNER JOIN cv ON cv.cv_id = CVT.cv_id ". "WHERE CVT.name = 'analysis_interpro_settings' ". "AND CV.name = 'tripal'"; $type_id = db_result(db_query($sql)); // Insert into chado {analysisprop} table $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) ". "VALUES (%d, %d, '%s')"; $interprosettings = $node->interprofile."|".$node->interproparameters; db_query($sql, $analysis_id, $type_id, $interprosettings); tripal_db_set_active($previous_db); // switch back to drupal database // Add a job if the user wants to parse the html output if($node->interprojob) { $job_args[0] = $analysis_id; $job_args[1] = $node->interprofile; if ($node->parsego) { $job_args[2] = 1; } else { $job_args[2] = 0; } if (is_readable($node->interprofile)) { $fname = preg_replace("/.*\/(.*)/", "$1", $node->interprofile); tripal_add_job("Parse interpro: $fname",'tripal_analysis_interpro', 'tripal_analysis_interpro_parseHTMLFile', $job_args, $user->uid); } else { drupal_set_message("Can not open interpro output file. Job not scheduled."); } } } // Make sure the entry for this analysis doesn't already exist in the // chado_analysis table if it doesn't exist then we want to add it. $node_check_sql = "SELECT * FROM {chado_analysis} ". "WHERE analysis_id = %d"; $node_check = db_fetch_object(db_query($node_check_sql, $analysis_id)); if(!$node_check){ // next add the item to the drupal table $sql = "INSERT INTO {chado_analysis} (nid, vid, analysis_id) ". "VALUES (%d, %d, %d)"; db_query($sql,$node->nid,$node->vid,$analysis_id); // Create a title for the analysis node using the unique keys so when the // node is saved, it will have a title $record = new stdClass(); // If the analysis has a name, use it as the node title. If not, construct // the title using program, programversion, and sourcename if ($node->analysisname) { $record->title = $node->analysisname; } else { //Construct node title as "program (version) $record->title = "$node->program ($node->programversion)"; } $record->nid = $node->nid; drupal_write_record('node',$record,'nid'); drupal_write_record('node_revisions',$record,'nid'); } } /******************************************************************************* * Delete interpro anlysis */ function chado_analysis_interpro_delete($node){ // Before removing, get analysis_id so we can remove it from chado database // later $sql_drupal = "SELECT analysis_id ". "FROM {chado_analysis} ". "WHERE nid = %d ". "AND vid = %d"; $analysis_id = db_result(db_query($sql_drupal, $node->nid, $node->vid)); // Remove data from the {chado_analysis}, {node}, and {node_revisions} tables $sql_del = "DELETE FROM {chado_analysis} ". "WHERE nid = %d ". "AND vid = %d"; db_query($sql_del, $node->nid, $node->vid); $sql_del = "DELETE FROM {node} ". "WHERE nid = %d ". "AND vid = %d"; db_query($sql_del, $node->nid, $node->vid); $sql_del = "DELETE FROM {node_revisions} ". "WHERE nid = %d ". "AND vid = %d"; db_query($sql_del, $node->nid, $node->vid); //Remove from analysisfeatureprop, analysisfeature, analysis, and analysisprop tables $previous_db = tripal_db_set_active('chado'); $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE analysis_id=%d"; $results = db_query($sql, $analysis_id); while ($af = db_fetch_object($results)) { db_query("DELETE FROM {analysisfeatureprop} WHERE analysisfeature_id = %d", $af->analysisfeature_id); } db_query("DELETE FROM {analysisfeature} WHERE analysis_id = %d", $analysis_id); db_query("DELETE FROM {analysisprop} WHERE analysis_id = %d", $analysis_id); db_query("DELETE FROM {analysis} WHERE analysis_id = %d", $analysis_id); tripal_db_set_active($previous_db); } /******************************************************************************* * Update interpro analysis */ function chado_analysis_interpro_update($node){ global $user; if($node->revision){ // TODO -- decide what to do about revisions } else { // Create a timestamp so we can insert it into the chado database $time = $node->timeexecuted; $month = $time['month']; $day = $time['day']; $year = $time['year']; $timestamp = $month.'/'.$day.'/'.$year; // get the analysis_id for this node: $sql = "SELECT analysis_id ". "FROM {chado_analysis} ". "WHERE vid = %d"; $analysis_id = db_fetch_object(db_query($sql, $node->vid))->analysis_id; $sql = "UPDATE {analysis} ". "SET name = '%s', ". " description = '%s', ". " program = '%s', ". " programversion = '%s', ". " algorithm = '%s', ". " sourcename = '%s', ". " sourceversion = '%s', ". " sourceuri = '%s', ". " timeexecuted = '%s' ". "WHERE analysis_id = %d "; $previous_db = tripal_db_set_active('chado'); // use chado database db_query($sql, $node->analysisname, $node->description, $node->program, $node->programversion,$node->algorithm,$node->sourcename, $node->sourceversion, $node->sourceuri, $timestamp, $analysis_id); // Get cvterm_id for 'analysis_interpro_settings' $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ". "INNER JOIN cv CV ON CV.cv_id = CVT.cv_id ". "WHERE CVT.name = 'analysis_interpro_settings' ". "AND CV.name = 'tripal'"; $type_id = db_result(db_query($sql)); $sql = "UPDATE {analysisprop} ". "SET value = '%s' ". "WHERE analysis_id = %d AND type_id = %d"; $interprosettings = $node->interprofile."|".$node->interproparameters; db_query($sql, $interprosettings, $analysis_id, $type_id); tripal_db_set_active($previous_db); // switch back to drupal database // Add a job if the user wants to parse the html output if($node->interprojob) { $job_args[0] = $analysis_id; $job_args[1] = $node->interprofile; if ($node->parsego) { $job_args[2] = 1; } else { $job_args[2] = 0; } if (is_readable($node->interprofile)) { $fname = preg_replace("/.*\/(.*)/", "$1", $node->interprofile); tripal_add_job("Parse interpro: $fname",'tripal_analysis_interpro', 'tripal_analysis_interpro_parseHTMLFile', $job_args, $user->uid); } else { drupal_set_message("Can not open interpro output file. Job not scheduled."); } } // Create a title for the analysis node using the unique keys so when the // node is saved, it will have a title $record = new stdClass(); // If the analysis has a name, use it as the node title. If not, construct // the title using program, programversion, and sourcename if ($node->analysisname) { $record->title = $node->analysisname; } else { //Construct node title as "program (version) $record->title = "$node->program ($node->programversion)"; } $record->nid = $node->nid; drupal_write_record('node',$record,'nid'); drupal_write_record('node_revisions',$record,'nid'); } } /******************************************************************************* * When a node is requested by the user this function is called to allow us * to add auxiliary data to the node object. */ function chado_analysis_interpro_load($node){ // get the analysis_id for this node: $sql = "SELECT analysis_id FROM {chado_analysis} WHERE vid = %d"; $ana_node = db_fetch_object(db_query($sql, $node->vid)); $additions = new stdClass(); if ($ana_node) { // get analysis information $sql = "SELECT Analysis_id, name AS analysisname, description, program, ". " programversion, algorithm, sourcename, sourceversion, ". " sourceuri, timeexecuted ". "FROM {Analysis} ". "WHERE Analysis_id = $ana_node->analysis_id"; $previous_db = tripal_db_set_active('chado'); // use chado database $additions = db_fetch_object(db_query($sql)); // get cvterm_id for 'analysis_interpro_settings' $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ". "INNER JOIN cv ON cv.cv_id = CVT.cv_id ". "WHERE CVT.name = 'analysis_interpro_settings' ". "AND CV.name = 'tripal'"; $type_id = db_result(db_query($sql)); // get analysisprop information $sql = "SELECT value FROM {analysisprop} ". "WHERE analysis_id = %d ". "AND type_id = %d"; $analysisprop = db_result(db_query($sql, $ana_node->analysis_id, $type_id)); $prop_values = explode ("|", $analysisprop, 1); $additions->interprofile = $prop_values[0]; $additions->interproparameters = $prop_values[1]; tripal_db_set_active($previous_db); // now use drupal database } // If the analysis has a name, use it as the node title. If not, construct // the title using program programversion, and sourcename if ($additions->analysisname) { $additions->title = $additions->analysisname; } else { // Construct node title as "program version (source) $additions->title = "$additions->program ($additions->programversion)"; } return $additions; } /******************************************************************************* * This function customizes the view of the chado_analysis node. It allows * us to generate the markup. */ function chado_analysis_interpro_view ($node, $teaser = FALSE, $page = FALSE) { // use drupal's default node view: if (!$teaser) { $node = node_prepare($node, $teaser); // When previewing a node submitting form, it shows 'Array' instead of // correct date format. We need to format the date here $time = $node->timeexecuted; if(is_array($time)){ $month = $time['month']; $day = $time['day']; $year = $time['year']; $timestamp = $year.'-'.$month.'-'.$day; $node->timeexecuted = $timestamp; } // When viewing a node, we need to reformat the analysisprop since we // separate each value with a bar | if (preg_match("/.*\|.*/",$node->interprofile)) { $prop_values = explode("|", $node->interprofile); $node->interprofile = $prop_values[0]; $node->interproparameters = $prop_values[1]; } } return $node; } /******************************************************************************* * Parse Interpro HTML Output file into analysisfeatureprop table */ function tripal_analysis_interpro_parseHTMLFile ($analysis_id, $interprofile, $parsego, $job_id) { // Prepare log $filename = preg_replace("/.*\/(.*)/", "$1", $interprofile); $logfile = file_directory_path() . "/tripal/tripal_analysis_interpro/load_$filename.log"; $log = fopen($logfile, 'a'); // append parsing results to log file // Parsing started print "Parsing File:".$interprofile." ...\n"; fwrite($log, date("D M j G:i:s Y").". Loading $interprofile\n"); // Get cvterm_id for 'analysis_interpro_output_iteration_hits' which is required // for inserting into the analysisfeatureprop table $previous_db = tripal_db_set_active('chado'); // use chado database $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ". "INNER JOIN cv ON cv.cv_id = CVT.cv_id ". "WHERE CVT.name = 'analysis_interpro_output_hit' ". "AND CV.name = 'tripal'"; $type_id = db_result(db_query($sql)); print "cvterm_id for analysis_interpro_output_iteration_hits is $type_id\n"; // Load the HTML file and convert it into XML for loading $dom = new domDocument; $dom->loadHTMLFile($interprofile); $xml = $dom->saveXML(); $interproput = simplexml_load_string($xml); // Get html tables for parsing $tables = $interproput->children()->children(); // Count the number of tables to be processed $no_iterations = 0; foreach($tables as $tmp) { if ($tmp->getName() == 'table') { $no_iterations ++; } } print "$no_iterations html tables to be processed.\n"; $interval = intval($no_iterations * 0.01); $idx_iterations = 0; // Processed the tables foreach ($tables as $table) { //if (preg_match('/No hits reported/', $table->asXML()) ) { //print "skipping this table b/c no hits are reported\n"; //} // make sure we are looking at a table and its not an empty table if ($table->getName() == 'table' && !preg_match('/No hits reported/', $table->asXML()) ) { $idx_iterations ++; if ($idx_iterations % $interval == 0) { $percentage = (int) ($idx_iterations / $no_iterations * 100); tripal_db_set_active($previous_db); tripal_job_set_progress($job_id, $percentage); $previous_db = tripal_db_set_active('chado'); print $percentage."% "; } // Set job status // Get the first row and match its name with the feature name $firsttd = $table->children()->children()->children(); $feature_id = 0; foreach($firsttd as $b) { foreach($b->children() as $a) { if ($a->getName() == 'a') { // Remove _ORF from the sequence name $seqname = preg_replace('/^(.+?)_\d_.+/', "$1", $a); print "seqname is $seqname\n"; // Find out how many features match this uniquename $sql = "SELECT count(feature_id) FROM {feature} ". "WHERE uniquename = '%s' "; $no_features = db_result(db_query($sql, $seqname)); // If there is only one match, get the feature_id if ($no_features == 1) { $sql = "SELECT feature_id FROM {feature} ". "WHERE uniquename = '%s' "; $feature_id = db_result(db_query($sql, $seqname)); print "\tfeature id is $feature_id\n"; // If the uniquename matches more than one features then skip and print 'Ambiguous' } else if ($no_features > 1) { fwrite($log, "Ambiguous: ".$seqname." matches more than one feature and is not processed.\n"); continue; // If the uniquename did not match, skip and print 'Failed' } else { fwrite($log, "Failed: ".$seqname."\n"); } } } } // Successfully matched. print 'Succeeded'. Add analysis_id and // feature_id to analysisfeature. Add the table as XML to analysisfeatureprop if ($feature_id) { //------------------------------------ // Clease unwanted rows from the table //------------------------------------ $parent_row = "/