t('Analysis: KEGG'), 'module' => 'chado_analysis_kegg', 'description' => t('Results from a KEGG/KAAS analysis'), 'has_title' => FALSE, 'title_label' => t('Analysis: KEGG'), 'has_body' => FALSE, 'body_label' => t('KEGG Analysis Description'), 'locked' => TRUE ); return $nodes; } /******************************************************************************* * Menu items are automatically added for the new node types created * by this module to the 'Create Content' Navigation menu item. This function * adds more menu items needed for this module. */ function tripal_analysis_kegg_menu() { $items['brite/%'] = array( 'title' => t('KEGG BRITE'), 'page callback' => 'tripal_analysis_kegg_brite', 'page arguments' => array(1, 2), 'access arguments' => array('access content'), 'type' => MENU_CALLBACK ); $items['tripal_analysis_kegg_org_report/%'] = array( 'path' => 'tripal_analysis_kegg_org_report', 'title' => t('Analysis KEGG report'), 'page callback' => 'tripal_analysis_kegg_org_report', 'page arguments' => array(1), 'access arguments' => array('access chado_analysis_kegg content'), 'type' => MENU_CALLBACK ); return $items; } /******************************************************************************* * Set the permission types that the chado module uses. Essentially we * want permissionis that protect creation, editing and deleting of chado * data objects */ function tripal_analysis_kegg_perm(){ return array( 'access chado_analysis_kegg content', 'create chado_analysis_kegg content', 'delete chado_analysis_kegg content', 'edit chado_analysis_kegg content', ); } /******************************************************************************* * The following function proves access control for users trying to * perform actions on data managed by this module */ function chado_analysis_kegg_access($op, $node, $account){ if ($op == 'create') { return user_access('create chado_analysis_kegg content', $account); } if ($op == 'update') { if (user_access('edit chado_analysis_kegg content', $account)) { return TRUE; } } if ($op == 'delete') { if (user_access('delete chado_analysis_kegg content', $account)) { return TRUE; } } if ($op == 'view') { if (user_access('access chado_analysis_kegg content', $account)) { return TRUE; } } return FALSE; } /******************************************************************************* */ function tripal_analysis_kegg_brite($analysis_id, $type_id, $ajax){ // If not called by ajax if (!$ajax) { $content .= "
KEGG BRITE Hierarchy:
"; // List all BRITE terms on the left $sql = "SELECT DISTINCT CVT.name, CVT.cvterm_id FROM {cvterm} CVT INNER JOIN analysisprop AP ON CVT.cvterm_id = AP.type_id WHERE AP.analysis_id = %d AND CVT.definition LIKE 'KEGG BRITE term: %' ORDER BY CVT.cvterm_id"; $previous_db = tripal_db_set_active('chado'); $result = db_query($sql, $analysis_id); tripal_db_set_active($previous_db); while ($brite_term = db_fetch_object($result)) { $url = url("brite/$analysis_id/$brite_term->cvterm_id/1"); $content .= "
  • cvterm_id)\" href=\"$url\"> $brite_term->name
  • "; } // Show the hierarchy tree $content .="
    "; $content .= "Note: Click a BRITE term for its functional hierarchy"; // If called by ajax, generate tree structure } else { // Get BRITE term from cvterm table $previous_db = tripal_db_set_active('chado'); $sql = 'SELECT name FROM {cvterm} WHERE cvterm_id=%d'; $brite_term = db_result(db_query($sql, $type_id)); // Get BRITE hierarchy tree $sql = "SELECT value FROM {analysisprop} AP INNER JOIN CVterm CVT on AP.type_id = CVT.cvterm_id INNER JOIN CV on CVT.cv_id = CV.cv_id WHERE CV.name = 'tripal' and CVT.name = '%s' AND AP.analysis_id = %d"; $result = db_fetch_object(db_query($sql, $brite_term, $analysis_id)); tripal_db_set_active($previous_db); $content .= "
    $result->value
    "; } if (!$ajax) { $content .= "
    "; } // since this function provides output for addition into // an analysis page, as well as an AJAX refresh of content // within the BRITE hierarchy we need to setup the return // different depending on the request type if($ajax){ drupal_json(array('update' => $content, 'id' => "tripal_kegg_brite_tree_$type_id", 'brite_term' => "Hierarchy: $brite_term")); } else { return $content; } } /******************************************************************************* * Provide a KEGG Analysis form */ function chado_analysis_kegg_form ($node){ // add in the default fields $form = chado_analysis_form($node); // set the defaults $kegg = $node->analysis->tripal_analysis_kegg; $query_re = $kegg->query_re; $query_type = $kegg->query_type; $query_uniquename = $kegg->query_uniquename; $hierfile = $kegg->hierfile; $moreSettings ['kegg'] = 'KEGG Analysis Settings'; $form['kegg'] = array( '#title' => t('KEGG Settings'), '#type' => 'fieldset', '#description' => t('Specific Settings for KEGG Analysis.'), '#collapsible' => TRUE, '#attributes' => array('id' => 'kegg-extra-settings'), '#weight' => 11 ); $form['kegg']['hierfile'] = array( '#title' => t('KAAS hier.tar.gz Output File'), '#type' => 'textfield', '#description' => t('The full path to the hier.tar.gz file generated by KAAS. Alternatively, you can input the full path to the directory that contains decompressed kegg files.'), '#default_value' => $hierfile, ); $form['kegg']['query_re'] = array( '#title' => t('Query Name RE'), '#type' => 'textfield', '#description' => t('Enter the regular expression that will extract the '. 'feature name from the results line in the KEGG heir results. This will be '. 'the same as the definition line in the query FASTA file used for the analysis. This option is '. 'is only required when the query does not identically match a feature '. 'in the database.'), '#default_value' => $query_re, ); $form['kegg']['query_type'] = array( '#title' => t('Query Type'), '#type' => 'textfield', '#description' => t('Please enter the Sequence Ontology term that describes '. 'the query sequences used for KEGG. This is only necessary if two '. 'or more sequences have the same name.'), '#default_value' => $query_type, ); $form['kegg']['query_uniquename'] = array( '#title' => t('Use Unique Name'), '#type' => 'checkbox', '#description' => t('Select this checboxk if the feature name in the KEGG heir file '. 'matches the uniquename in the database. By default, the feature will '. 'be mapped to the "name" of the feature.'), '#default_value' => $query_uniquename, ); $form['kegg']['keggjob'] = array( '#type' => 'checkbox', '#title' => t('Submit a job to parse the kegg output into analysisfeatureprop table'), '#description' => t('Note: features associated with the KAAS results must '. 'exist in chado before parsing the file. Otherwise, KEGG '. 'results that cannot be linked to a feature will be '. 'discarded.'), ); return $form; } /******************************************************************************* * */ function chado_analysis_kegg_insert($node){ // insert the analysis chado_analysis_insert($node); // set the type for this analysis tripal_analysis_insert_property($node->analysis_id,'analysis_type','tripal_analysis_kegg'); // now add in the remaining settings as a single property but separated by bars tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_settings',$node->hierfile); tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_query_re',$node->query_re); tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_query_type',$node->query_type); tripal_analysis_insert_property($node->analysis_id,'analysis_kegg_query_uniquename',$node->query_uniquename); // Add a job if the user wants to parse the html output chado_analysis_kegg_submit_job($node); } /** * */ function chado_analysis_kegg_submit_job($node){ global $user; if($node->keggjob) { $job_args[0] = $node->analysis_id; $job_args[1] = $node->hierfile; $job_args[2] = base_path(); $job_args[3] = $node->query_re; $job_args[4] = $node->query_type; $job_args[5] = $node->query_uniquename; if (is_readable($node->hierfile)) { $fname = preg_replace("/.*\/(.*)/", "$1", $node->hierfile); tripal_add_job("Parse KAAS output: $fname",'tripal_analysis_kegg', 'tripal_analysis_kegg_parseHierFile', $job_args, $user->uid); } else { drupal_set_message("Can not open KAAS hier.tar.gz output file. Job not scheduled."); } } } /******************************************************************************* * Delete KEGG anlysis */ function chado_analysis_kegg_delete($node){ chado_analysis_delete($node); } /******************************************************************************* * Update KEGG analysis */ function chado_analysis_kegg_update($node){ // insert the analysis chado_analysis_update($node); // set the type for this analysis tripal_analysis_update_property($node->analysis_id,'analysis_type','tripal_analysis_kegg',1); // now add in the remaining settings as a single property but separated by bars tripal_analysis_update_property($node->analysis_id,'analysis_kegg_settings',$node->hierfile,1); tripal_analysis_update_property($node->analysis_id,'analysis_kegg_query_re',$node->query_re,1); tripal_analysis_update_property($node->analysis_id,'analysis_kegg_query_type',$node->query_type,1); tripal_analysis_update_property($node->analysis_id,'analysis_kegg_query_uniquename',$node->query_uniquename,1); // Add a job if the user wants to parse the html output chado_analysis_kegg_submit_job($node); } /******************************************************************************* * When a node is requested by the user this function is called to allow us * to add auxiliary data to the node object. */ function chado_analysis_kegg_load($node){ // load the default set of analysis fields $additions = chado_analysis_load($node); // create some variables for easier lookup $analysis = $additions->analysis; $analysis_id = $analysis->analysis_id; // get the heirfile name $hierfile = tripal_analysis_get_property($analysis_id,'analysis_kegg_settings'); $query_re = tripal_analysis_get_property($analysis->analysis_id,'analysis_kegg_query_re'); $query_type = tripal_analysis_get_property($analysis->analysis_id,'analysis_kegg_query_type'); $query_uniquename= tripal_analysis_get_property($analysis->analysis_id,'analysis_kegg_query_uniquename'); $analysis->tripal_analysis_kegg->hierfile = $hierfile->value; $analysis->tripal_analysis_kegg->query_re = $query_re->value; $analysis->tripal_analysis_kegg->query_type = $query_type->value; $analysis->tripal_analysis_kegg->query_uniquename= $query_uniquename->value; return $additions; } /** * */ function chado_analysis_kegg_view ($node, $teaser = FALSE, $page = FALSE) { // use drupal's default node view: if (!$teaser) { $node = node_prepare($node, $teaser); // When previewing a node submitting form, it shows 'Array' instead of // correct date format. We need to format the date here $time = $node->timeexecuted; if(is_array($time)){ $month = $time['month']; $day = $time['day']; $year = $time['year']; $timestamp = $year.'-'.$month.'-'.$day; $node->timeexecuted = $timestamp; } } return $node; } /******************************************************************************** */ function tripal_analysis_kegg_parseHierFile ($analysis_id, $hierfile, $base_path, $query_re,$query_type,$query_uniquename,$job_id) { // If user input a file (e.g. hier.tar.gz), decompress it first if (is_file($hierfile)) { $data_dir = file_directory_path() . "/tripal/tripal_analysis_kegg"; $stderr = shell_exec("cd $data_dir; tar -zxf $hierfile;"); print "$stderr\n"; $hierdir = $data_dir."/hier"; // Otherwise, treat it as a directory } else { $hierdir = $hierfile; } $dir_handle = @opendir($hierdir) or die("Unable to open $hierdir"); $total_files = count(glob($hierdir . '/*.*')); print "There are $total_files keg file(s).\n"; $interval = intval($total_files * 0.01); $no_file = 0; // Remove the analysis features for this analysis // we will rebuild them from just this parsing if(!tripal_core_chado_delete('analysisfeature',array('analysis_id' => $analysis_id))){ print "ERROR: Cannot prepare the analysis for addint features\n"; exit; } while ($file = readdir($dir_handle)) { if(preg_match("/^.*\.keg/",$file)){ // Update the progress if ($no_file % $interval == 0) { $percentage = (int) ($no_file / $total_files * 100); tripal_job_set_progress($job_id, $percentage); print $percentage."% "; } $no_file ++; # $type variable will be set in tripal_analysis_kegg_parse_kegg_file() $content = tripal_analysis_kegg_parse_kegg_file("$hierdir/$file",$type, $analysis_id, $base_path, $query_re,$query_type,$query_uniquename); # add the item to the database if($content){ //------------------------------------------------------ // Insert into analysisprop table //------------------------------------------------------ // Make sure the same value doesn't exist before inserting into chado $sql = "SELECT value FROM {analysisprop} WHERE analysis_id = %d AND type_id = (SELECT cvterm_id FROM {cvterm} CVT INNER JOIN CV ON CVT.cv_id = CV.cv_id WHERE CV.name = 'tripal' AND CVT.name = '%s' ) "; $previous_db = tripal_db_set_active('chado'); $oldvalue = db_result(db_query($sql, $analysis_id, $type)); tripal_db_set_active($previous_db); // Insert the content if ($oldvalue != $content) { $previous_db = tripal_db_set_active('chado'); // Use chado database // Get type_id for the BRITE term $sql = "SELECT cvterm_id FROM {cvterm} CVT INNER JOIN CV ON CVT.cv_id = CV.cv_id WHERE CV.name = 'tripal' AND CVT.name = '%s'"; $brite_cvterm_id = db_result(db_query($sql, $type)); // Replace all single quote as HTML code before insert $content = preg_replace("/\'/", "'", $content); // Insert the value $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) VALUES (%d, %d,'$content')"; db_query($sql, $analysis_id, $brite_cvterm_id); tripal_db_set_active($previous_db); // Use drupal database } } } } print "Done.\n"; closedir($dir_handle); // If user input a file, remove decompressed files after parsing if (is_file($hierfile)) { $stderr = shell_exec("rm -r $hierdir;"); print "$stderr\n"; } return; } /******************************************************************************* * Parse *.keg files. * Each file has a definition line. BRITE term is extracted * from this line * and added to chado as a cvterm. Tree structure for this cvterm is then * generated and saved to analysisfeature and analysisfeatureprop tables. */ function tripal_analysis_kegg_parse_kegg_file ($file,&$type,$analysis_id, $base_path, $query_re,$query_type,$query_uniquename){ $handle = fopen($file,'r'); $depth = array(); $current = '@'; # this is one character below 'A' in the ASCII table $prev = '@'; $id = 0; $type_id = 0; $no_line = 0; $ul_content = array(); $has_feature = 0; $level = -1; while($line = fgets($handle)){ $no_line ++; // Find out what kind of file we're looking at. if(preg_match("/#.*nbsp;\s(.*)<\/h2>$/",$line,$matches)){ // Set type as the matched term in the DEFINITION line and add it as a new cvterm $type = $matches[1]; // Before inserting, make sure this cvterm doesn't exist $previous_db = tripal_db_set_active('chado'); $sql = "SELECT cvterm_id FROM {cvterm} CVT INNER JOIN cv ON cv.cv_id = CVT.cv_id WHERE cv.name = 'tripal' AND CVT.name = '%s'"; $type_id = db_result(db_query($sql, $type)); tripal_db_set_active($previous_db); if (!$type_id) { tripal_add_cvterms($type, "KEGG BRITE term: $type"); // Get newly added type_id $sql = "SELECT cvterm_id FROM {cvterm} CVT INNER JOIN cv ON cv.cv_id = CVT.cv_id WHERE cv.name = 'tripal' AND CVT.name = '%s'"; $previous_db = tripal_db_set_active('chado'); $type_id = db_result(db_query($sql, $type)); tripal_db_set_active($previous_db); } } // get the depth of the hierarch (e.g. A,B,C or D); preg_match("/^([ABCDEFGHIJKLMNOP])\s*(.*)/",$line,$matches); // skip lines that aren't data or are empty if(!$matches[1] or !$matches[2]){continue;} // set the current level and keep track of the previous level (e.g. A,B,C...) $prev = $current; $current = $matches[1]; // if we have matches and we are going down to a lower level then we // want to close off each \n"; $content .= $ul_content[$level]; $ul_content[$level--] = ''; } $has_feature = 0; } else { for($i = (ord($current) - ord($prev)); $i < 0; $i++){ $ul_content[$level--] = ''; } } // if we've gone up a level then add a new