4 лет назад · f138256959
--- a/abundance_estimates_to_matrix.pl
+++ b/abundance_estimates_to_matrix.pl
@@ -0,0 +1,330 @@
 
															+#!/usr/bin/env perl
														
 
															+
														
 
															+use strict;
														
 
															+use warnings;
														
 
															+use FindBin;
														
 
															+use File::Basename;
														
 
															+use Getopt::Long qw(:config no_ignore_case bundling pass_through);
														
 
															+
														
 
															+my $usage = <<__EOUSAGE__;
														
 
															+
														
 
															+############################################################
														
 
															+#
														
 
															+# Usage:  $0 --est_method <method>  sample1.results sample2.results ...
														
 
															+#
														
 
															+#      or  $0 --est_method <method> --quant_files file.listing_target_files.txt
														
 
															+#
														
 
															+#      Note, if only a single input file is given, it's expected to contain the paths to all the target abundance estimation files.
														
 
															+#
														
 
															+# Required:
														
 
															+#            
														
 
															+#  --est_method <string>           featureCounts|RSEM|eXpress|kallisto|salmon  (needs to know what format to expect)
														
 
															+#
														
 
															+# Options:
														
 
															+#
														
 
															+#  --cross_sample_norm <string>         TMM|UpperQuartile|none   (default: TMM)
														
 
															+#
														
 
															+#  --name_sample_by_basedir             name sample column by dirname instead of filename
														
 
															+#      --basedir_index <int>            default(-2)
														
 
															+#
														
 
															+#  --out_prefix <string>                default: 'matrix'
														
 
															+#
														
 
															+#  --quant_files <string>              file containing a list of all the target files.
														
 
															+#
														
 
															+############################################################
														
 
															+
														
 
															+
														
 
															+__EOUSAGE__
														
 
															+
														
 
															+    ;
														
 
															+
														
 
															+
														
 
															+my $help_flag;
														
 
															+my $est_method;
														
 
															+my $val_type;
														
 
															+my $cross_sample_norm = "TMM";
														
 
															+my $name_sample_by_basedir = 0;
														
 
															+my $out_prefix = "matrix";
														
 
															+my $basedir_index = -2;
														
 
															+my $quant_files = "";
														
 
															+
														
 
															+&GetOptions('help|h' => \$help_flag,
														
 
															+            'est_method=s' => \$est_method,
														
 
															+            
														
 
															+            'cross_sample_norm=s' => \$cross_sample_norm,
														
 
															+            'name_sample_by_basedir' => \$name_sample_by_basedir,
														
 
															+            'out_prefix=s' => \$out_prefix,
														
 
															+            'basedir_index=i' => \$basedir_index,
														
 
															+            
														
 
															+            'quant_files=s' => \$quant_files,
														
 
															+            );
														
 
															+
														
 
															+
														
 
															+unless ($est_method && (@ARGV || $quant_files)) {
														
 
															+    die $usage;
														
 
															+}
														
 
															+
														
 
															+unless ($est_method =~ /^(featureCounts|RSEM|eXpress|kallisto|salmon)/i) {
														
 
															+    die "Error, dont recognize --est_method $est_method ";
														
 
															+}
														
 
															+unless ($cross_sample_norm =~ /^(TMM|UpperQuartile|none)$/i) {
														
 
															+    die "Error, dont recognize --cross_sample_norm $cross_sample_norm ";
														
 
															+}
														
 
															+
														
 
															+my @files;
														
 
															+
														
 
															+if ($quant_files) {
														
 
															+    # allow for a file listing the various files.
														
 
															+    @files = `cat $quant_files`;
														
 
															+    chomp @files;
														
 
															+}
														
 
															+elsif (@ARGV) {
														
 
															+    @files = @ARGV;
														
 
															+}
														
 
															+else {
														
 
															+    die $usage;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+
														
 
															+=data_formats
														
 
															+
														
 
															+## featureCounts
														
 
															+
														
 
															+0	gene_id
														
 
															+1	counts
														
 
															+2	fpkm
														
 
															+3	tpm
														
 
															+## RSEM:
														
 
															+
														
 
															+0       transcript_id
														
 
															+1       gene_id
														
 
															+2       length
														
 
															+3       effective_length
														
 
															+4       expected_count
														
 
															+5       TPM
														
 
															+6       FPKM
														
 
															+7       IsoPct
														
 
															+
														
 
															+
														
 
															+## eXpress v1.5:
														
 
															+
														
 
															+1       target_id
														
 
															+2       length
														
 
															+3       eff_length
														
 
															+4       tot_counts
														
 
															+5       uniq_counts
														
 
															+6       est_counts
														
 
															+7       eff_counts
														
 
															+8       ambig_distr_alpha
														
 
															+9       ambig_distr_beta
														
 
															+10      fpkm
														
 
															+11      fpkm_conf_low
														
 
															+12      fpkm_conf_high
														
 
															+13      solvable
														
 
															+14      tpm
														
 
															+
														
 
															+
														
 
															+## kallisto:
														
 
															+0       target_id
														
 
															+1       length
														
 
															+2       eff_length
														
 
															+3       est_counts
														
 
															+4       tpm
														
 
															+
														
 
															+
														
 
															+## salmon:
														
 
															+0       Name
														
 
															+1       Length
														
 
															+2       EffectiveLength
														
 
															+3       TPM
														
 
															+4       NumReads
														
 
															+
														
 
															+=cut
														
 
															+    
														
 
															+    ;
														
 
															+
														
 
															+my ($acc_field, $counts_field, $fpkm_field, $tpm_field);
														
 
															+
														
 
															+if ($est_method =~ /^rsem$/i) {
														
 
															+    $acc_field = 0;
														
 
															+    $counts_field = "expected_count";
														
 
															+    $fpkm_field = "FPKM";
														
 
															+    $tpm_field = "TPM";
														
 
															+}
														
 
															+elsif ($est_method =~ /^express$/i) {  # as of v1.5
														
 
															+    $acc_field = "target_id";
														
 
															+    $counts_field = "eff_counts";
														
 
															+    $fpkm_field = "fpkm";
														
 
															+    $tpm_field = "tpm";
														
 
															+}
														
 
															+elsif ($est_method =~ /^kallisto$/i) {
														
 
															+    $acc_field = "target_id";
														
 
															+    $counts_field = "est_counts";
														
 
															+    $fpkm_field = "tpm";
														
 
															+    $tpm_field = "tpm";
														
 
															+}
														
 
															+elsif ($est_method =~ /^salmon/) {
														
 
															+    $acc_field = "Name";
														
 
															+    $counts_field = "NumReads";
														
 
															+    $fpkm_field = "TPM";
														
 
															+    $tpm_field = "TPM";
														
 
															+}
														
 
															+elsif ($est_method =~ /^featureCounts/) {
														
 
															+    $acc_field = "gene_id";
														
 
															+    $counts_field = "counts";
														
 
															+    $fpkm_field = "fpkm";
														
 
															+    $tpm_field = "tpm";
														
 
															+}
														
 
															+else {
														
 
															+    die "Error, dont recognize --est_method [$est_method] ";
														
 
															+}
														
 
															+
														
 
															+main: {
														
 
															+    
														
 
															+    my %data;
														
 
															+    
														
 
															+    foreach my $file (@files) {
														
 
															+        print STDERR "-reading file: $file\n";
														
 
															+        open (my $fh, $file) or die "Error, cannot open file $file";
														
 
															+        my $header = <$fh>; 
														
 
															+        chomp $header;
														
 
															+        my %fields = &parse_field_positions($header);
														
 
															+        #use Data::Dumper; print STDERR Dumper(\%fields);
														
 
															+        while (<$fh>) {
														
 
															+            chomp;
														
 
															+            
														
 
															+            my @x = split(/\t/);
														
 
															+            my $acc = $x[ $fields{$acc_field} ];
														
 
															+            my $count = $x[ $fields{$counts_field} ];
														
 
															+            my $fpkm = $x[ $fields{$fpkm_field} ];
														
 
															+            my $tpm = $x[ $fields{$tpm_field} ];
														
 
															+
														
 
															+            $data{$acc}->{$file}->{count} = $count;
														
 
															+            $data{$acc}->{$file}->{FPKM} = $fpkm;
														
 
															+            $data{$acc}->{$file}->{TPM} = $tpm;
														
 
															+        }
														
 
															+        close $fh;
														
 
															+    }
														
 
															+    
														
 
															+    my @filenames = @files;
														
 
															+    foreach my $file (@filenames) {
														
 
															+        if ($name_sample_by_basedir) {
														
 
															+            my @path = split(m|/|, $file);
														
 
															+            $file = $path[$basedir_index];
														
 
															+        }
														
 
															+        else {
														
 
															+            $file = basename($file);
														
 
															+        }
														
 
															+    }
														
 
															+    print STDERR "\n\n* Outputting combined matrix.\n\n";
														
 
															+    
														
 
															+    my $counts_matrix_file = "$out_prefix.counts.matrix";
														
 
															+    my $TPM_matrix_file = "$out_prefix.TPM.not_cross_norm";
														
 
															+    open (my $ofh_counts, ">$counts_matrix_file") or die "Error, cannot write file $counts_matrix_file";
														
 
															+    open (my $ofh_TPM, ">$TPM_matrix_file") or die "Error, cannot write file $TPM_matrix_file";
														
 
															+
														
 
															+    { # check to see if they're unique
														
 
															+        my %filename_map = map { + $_ => 1 } @filenames;
														
 
															+        if (scalar keys %filename_map != scalar @filenames) {
														
 
															+            die "Error, the column headings: @filenames are not unique.  Should you consider using the --name_sample_by_basedir parameter?";
														
 
															+        }
														
 
															+    }
														
 
															+    
														
 
															+
														
 
															+    # clean up matrix headers
														
 
															+    foreach my $file (@filenames) {
														
 
															+        # also, get rid of the part of the filename that RSEM adds
														
 
															+        $file =~ s/\.(genes|isoforms)\.results$//;
														
 
															+	# featureCounts
														
 
															+        $file =~ s/\.count$//;
														
 
															+    }
														
 
															+    
														
 
															+
														
 
															+    print $ofh_counts join("\t", "", @filenames) . "\n";
														
 
															+    print $ofh_TPM join("\t", "", @filenames) . "\n";
														
 
															+
														
 
															+    foreach my $acc (keys %data) {
														
 
															+        
														
 
															+        print $ofh_counts "$acc";
														
 
															+        print $ofh_TPM "$acc";
														
 
															+        
														
 
															+        foreach my $file (@files) {
														
 
															+
														
 
															+            my $count = $data{$acc}->{$file}->{count};
														
 
															+            unless (defined $count) {
														
 
															+                $count = "NA";
														
 
															+            }
														
 
															+            my $tpm = $data{$acc}->{$file}->{TPM};
														
 
															+            if (defined $tpm) {
														
 
															+                $tpm = $tpm/1;
														
 
															+            }
														
 
															+            else {
														
 
															+                $tpm = "NA";
														
 
															+            }
														
 
															+
														
 
															+            print $ofh_counts "\t$count";
														
 
															+            print $ofh_TPM "\t$tpm";
														
 
															+        }
														
 
															+        
														
 
															+        print $ofh_counts "\n";
														
 
															+        print $ofh_TPM "\n";
														
 
															+
														
 
															+    }
														
 
															+    close $ofh_counts;
														
 
															+    close $ofh_TPM;
														
 
															+    
														
 
															+
														
 
															+    if (scalar @files > 1) {
														
 
															+        ## more than one sample 
														
 
															+        
														
 
															+        if ($cross_sample_norm =~ /^TMM$/i) {
														
 
															+            my $cmd = "$FindBin::RealBin/support_scripts/run_TMM_scale_matrix.pl --matrix $TPM_matrix_file > $out_prefix.$cross_sample_norm.EXPR.matrix";
														
 
															+            &process_cmd($cmd);
														
 
															+        }
														
 
															+        elsif ($cross_sample_norm =~ /^UpperQuartile$/) {
														
 
															+            my $cmd = "$FindBin::RealBin/support_scripts/run_UpperQuartileNormalization_matrix.pl --matrix $TPM_matrix_file > $out_prefix.$cross_sample_norm.EXPR.matrix";
														
 
															+            &process_cmd($cmd);
														
 
															+        }
														
 
															+        elsif ($cross_sample_norm =~ /^none$/i) {
														
 
															+            print STDERR "-not performing cross-sample normalization.\n";
														
 
															+        }
														
 
															+    }
														
 
															+    else {
														
 
															+        unless (scalar @files == 1) { 
														
 
															+            die "Error, no target samples. Shouldn't get here.";
														
 
															+        }
														
 
															+        print STDERR "Warning, only one sample, so not performing cross-sample normalization\n";
														
 
															+    }
														
 
															+    print STDERR "Done.\n\n";
														
 
															+    
														
 
															+    exit(0);
														
 
															+}
														
 
															+
														
 
															+####
														
 
															+sub process_cmd {
														
 
															+    my ($cmd) = @_;
														
 
															+
														
 
															+    print STDERR $cmd;
														
 
															+    my $ret = system($cmd);
														
 
															+    if ($ret) {
														
 
															+        die "Error, CMD: $cmd died with ret $ret";
														
 
															+    }
														
 
															+
														
 
															+    return;
														
 
															+}
														
 
															+        
														
 
															+
														
 
															+####
														
 
															+sub parse_field_positions {
														
 
															+    my ($header) = @_;
														
 
															+
														
 
															+    my %field_pos;
														
 
															+    my @fields = split(/\s+/, $header);
														
 
															+    for (my $i = 0; $i <= $#fields; $i++) {
														
 
															+        $field_pos{$fields[$i]} = $i;
														
 
															+        $field_pos{$i} = $i; # for fixed column assignment
														
 
															+    }
														
 
															+
														
 
															+    return(%field_pos);
														
 
															+}
														
--- a/run-featurecounts.R
+++ b/run-featurecounts.R
@@ -0,0 +1,39 @@
 
															+#!/usr/bin/env Rscript
														
 
															+# parse parameter ---------------------------------------------------------
														
 
															+library(argparser, quietly=TRUE)
														
 
															+# Create a parser
														
 
															+p <- arg_parser("run featureCounts and calculate FPKM/TPM")
														
 
															+
														
 
															+# Add command line arguments
														
 
															+p <- add_argument(p, "--bam", help="input: bam file", type="character")
														
 
															+p <- add_argument(p, "--gtf", help="input: gtf file", type="character")
														
 
															+p <- add_argument(p, "--featureType", help="a character string or a vector of character strings giving the feature type or types used to select rows in the GTF annotation which will be used for read summarization", type="character", default="exon")
														
 
															+p <- add_argument(p, "--attrType", help="a character string giving the attribute type in the GTF annotation which will be used to group features (eg. exons) into meta-features", type="character", default="gene_id")
														
 
															+p <- add_argument(p, "--isPairedEnd", help="indicating whether libraries contain paired-end reads or not", type="logical", default=TRUE)
														
 
															+p <- add_argument(p, "--output", help="output prefix", type="character")
														
 
															+
														
 
															+# Parse the command line arguments
														
 
															+argv <- parse_args(p)
														
 
															+
														
 
															+library(Rsubread)
														
 
															+library(limma)
														
 
															+library(edgeR)
														
 
															+
														
 
															+bamFile <- argv$bam
														
 
															+gtfFile <- argv$gtf
														
 
															+nthreads <- 1
														
 
															+outFilePref <- argv$output
														
 
															+
														
 
															+outStatsFilePath  <- paste(outFilePref, '.log',  sep = ''); 
														
 
															+outCountsFilePath <- paste(outFilePref, '.count', sep = ''); 
														
 
															+
														
 
															+fCountsList = featureCounts(bamFile, annot.ext=gtfFile, isGTFAnnotationFile=TRUE, nthreads=nthreads, GTF.featureType=argv$featureType, GTF.attrType=argv$attrType, isPairedEnd=argv$isPairedEnd)
														
 
															+dgeList = DGEList(counts=fCountsList$counts, genes=fCountsList$annotation)
														
 
															+fpkm = rpkm(dgeList, dgeList$genes$Length)
														
 
															+tpm = exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
														
 
															+
														
 
															+write.table(fCountsList$stat, outStatsFilePath, sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
														
 
															+
														
 
															+featureCounts = cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm)
														
 
															+colnames(featureCounts) = c('gene_id', 'counts', 'fpkm','tpm')
														
 
															+write.table(featureCounts, outCountsFilePath, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
														
--- a/support_scripts/run_TMM_scale_matrix.pl
+++ b/support_scripts/run_TMM_scale_matrix.pl
@@ -0,0 +1,201 @@
 
															+#!/usr/bin/env perl
														
 
															+
														
 
															+use strict;
														
 
															+use warnings;
														
 
															+use Carp;
														
 
															+use Getopt::Long qw(:config no_ignore_case bundling);
														
 
															+use Cwd;
														
 
															+use FindBin;
														
 
															+use File::Basename;
														
 
															+use lib ("$FindBin::RealBin/../../PerlLib");
														
 
															+use Data::Dumper;
														
 
															+
														
 
															+my $usage = <<__EOUSAGE__;
														
 
															+
														
 
															+
														
 
															+
														
 
															+#################################################################################################
														
 
															+#
														
 
															+#  Required:
														
 
															+#
														
 
															+#  --matrix <string>      matrix of raw read counts (not normalized!)
														
 
															+#
														
 
															+################################################################################################
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+__EOUSAGE__
														
 
															+
														
 
															+
														
 
															+    ;
														
 
															+
														
 
															+
														
 
															+
														
 
															+my $matrix_file;
														
 
															+my $help_flag;
														
 
															+
														
 
															+&GetOptions ( 'h' => \$help_flag,
														
 
															+              'matrix=s' => \$matrix_file,
														
 
															+              );
														
 
															+
														
 
															+
														
 
															+if ($help_flag) {
														
 
															+    die $usage;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+unless ($matrix_file) {
														
 
															+    die $usage;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+
														
 
															+main: {
														
 
															+    
														
 
															+    my $tmm_info_file = &run_TMM($matrix_file);
														
 
															+    
														
 
															+    &write_normalized_file($matrix_file, $tmm_info_file);
														
 
															+    
														
 
															+    exit(0);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+####
														
 
															+sub run_TMM {
														
 
															+    my ($counts_matrix_file) = @_;
														
 
															+    
														
 
															+    my $tmm_norm_script = "__tmp_runTMM.R";
														
 
															+    open (my $ofh, ">$tmm_norm_script") or die "Error, cannot write to $tmm_norm_script";
														
 
															+    #print $ofh "source(\"$FindBin::RealBin/R/edgeR_funcs.R\")\n";
														
 
															+    
														
 
															+    print $ofh "library(edgeR)\n\n";
														
 
															+    
														
 
															+    print $ofh "rnaseqMatrix = read.table(\"$counts_matrix_file\", header=T, row.names=1, com='', check.names=F)\n";
														
 
															+    print $ofh "rnaseqMatrix = as.matrix(rnaseqMatrix)\n";
														
 
															+    print $ofh "rnaseqMatrix = round(rnaseqMatrix)\n";
														
 
															+    print $ofh "exp_study = DGEList(counts=rnaseqMatrix, group=factor(colnames(rnaseqMatrix)))\n";
														
 
															+    print $ofh "exp_study = calcNormFactors(exp_study)\n";
														
 
															+    
														
 
															+    print $ofh "exp_study\$samples\$eff.lib.size = exp_study\$samples\$lib.size * exp_study\$samples\$norm.factors\n";
														
 
															+    print $ofh "write.table(exp_study\$samples, file=\"$counts_matrix_file.TMM_info.txt\", quote=F, sep=\"\\t\", row.names=F)\n";
														
 
															+    
														
 
															+    close $ofh;
														
 
															+
														
 
															+    &process_cmd("Rscript $tmm_norm_script 1>&2 ");
														
 
															+    
														
 
															+    return("$counts_matrix_file.TMM_info.txt");
														
 
															+
														
 
															+}
														
 
															+
														
 
															+####
														
 
															+sub process_cmd {
														
 
															+    my ($cmd) = @_;
														
 
															+
														
 
															+    print STDERR "CMD: $cmd\n";
														
 
															+    my $ret = system($cmd);
														
 
															+
														
 
															+    if ($ret) {
														
 
															+        die "Error, cmd: $cmd died with ret ($ret) ";
														
 
															+    }
														
 
															+
														
 
															+    return;
														
 
															+}
														
 
															+
														
 
															+####
														
 
															+sub write_normalized_file {
														
 
															+    my ($matrix_file, $tmm_info_file) = @_;
														
 
															+
														
 
															+    my %col_to_eff_lib_size;
														
 
															+    my %col_to_norm_factor;
														
 
															+    open (my $fh, $tmm_info_file) or die "Error, cannot open file $tmm_info_file";
														
 
															+    
														
 
															+    my %bloom_to_col;
														
 
															+
														
 
															+    my $header = <$fh>;
														
 
															+    while (<$fh>) {
														
 
															+        chomp;
														
 
															+        my @x = split(/\t/);
														
 
															+        my ($col, $norm_factor, $eff_lib_size) = ($x[0], $x[2], $x[3]);
														
 
															+        
														
 
															+        $col =~ s/\"//g;
														
 
															+        
														
 
															+        my $bloom = $col;
														
 
															+        $bloom =~ s/\W/$;/g;
														
 
															+        
														
 
															+        $col_to_eff_lib_size{$col} = $eff_lib_size;
														
 
															+        $col_to_norm_factor{$col} = $norm_factor;
														
 
															+    
														
 
															+        if ($bloom ne $col) {
														
 
															+
														
 
															+            if (exists $bloom_to_col{$bloom}) {
														
 
															+                die "Error, already stored $bloom_to_col{$bloom} for $bloom, but trying to also store $col here... Ensure column names are unique according to non-word characters.";
														
 
															+            }
														
 
															+
														
 
															+            $col_to_eff_lib_size{$bloom} = $eff_lib_size;
														
 
															+            $col_to_norm_factor{$bloom} = $norm_factor;
														
 
															+        }
														
 
															+        
														
 
															+    }
														
 
															+    close $fh;
														
 
															+    
														
 
															+    open ($fh, $matrix_file);
														
 
															+    $header = <$fh>;
														
 
															+    chomp $header;
														
 
															+    my @pos_to_col = split(/\t/, $header);
														
 
															+    my $check_column_ordering_flag = 0;
														
 
															+    
														
 
															+    while (<$fh>) {
														
 
															+        chomp;
														
 
															+        my @x = split(/\t/);
														
 
															+        
														
 
															+        unless ($check_column_ordering_flag) {
														
 
															+            if (scalar(@x) == scalar(@pos_to_col) + 1) {
														
 
															+                ## header is offset, as is acceptable by R
														
 
															+                ## not acceptable here.  fix it:
														
 
															+                unshift (@pos_to_col, "");
														
 
															+            }
														
 
															+            $check_column_ordering_flag = 1;
														
 
															+            print join("\t", @pos_to_col) . "\n";
														
 
															+        }
														
 
															+        
														
 
															+
														
 
															+        my $gene = $x[0];
														
 
															+        
														
 
															+        print $gene;
														
 
															+        for (my $i = 1; $i <= $#x; $i++) {
														
 
															+            my $col = $pos_to_col[$i];
														
 
															+            
														
 
															+            $col =~ s/\"//g;
														
 
															+            
														
 
															+            my $adj_col = $col;
														
 
															+            $adj_col =~ s/-/\./g;
														
 
															+            
														
 
															+            my $bloom = $col;
														
 
															+            $bloom =~ s/\W/$;/g;
														
 
															+
														
 
															+            my $eff_lib_size = $col_to_eff_lib_size{$col} 
														
 
															+            || $col_to_eff_lib_size{$bloom}
														
 
															+            || $col_to_eff_lib_size{$adj_col}
														
 
															+            || $col_to_eff_lib_size{"X$col"} 
														
 
															+            || die "Error, no eff lib size for [$col] or [$bloom] or [$adj_col] or [\"X$col\"]" . Dumper(\%col_to_eff_lib_size);
														
 
															+            
														
 
															+            my $norm_factor = $col_to_norm_factor{$col}
														
 
															+            || $col_to_norm_factor{$bloom}
														
 
															+            || $col_to_norm_factor{$adj_col}
														
 
															+            || $col_to_norm_factor{"X$col"}
														
 
															+            || die "Error, no normalization scaling factor for $col" . Dumper(\%col_to_norm_factor);
														
 
															+            
														
 
															+
														
 
															+            my $read_count = $x[$i];
														
 
															+    
														
 
															+            my $converted_val = sprintf("%.3f", $read_count * 1/$norm_factor);
														
 
															+            
														
 
															+            print "\t$converted_val";
														
 
															+        }
														
 
															+        print "\n";
														
 
															+    }
														
 
															+    
														
 
															+    return;
														
 
															+
														
 
															+}