fasten_metrics/
fasten_metrics.rsextern crate fasten;
extern crate statistical;
extern crate getopts;
use std::fs::File;
use std::io::BufReader;
use std::io::BufRead;
use std::f64;
use fasten::fasten_base_options;
use fasten::fasten_base_options_matches;
use fasten::logmsg;
#[test]
fn test_average_quality () {
let easy_qual = "IIIIIIIIIIII";
let easy_avg_obs = average_quality(easy_qual);
let easy_avg_exp:f64 = 40.0;
assert_eq!(easy_avg_obs, easy_avg_exp, "Tried to calculate average quality for {}", easy_qual);
let hard_qual = "8AB*2D>C1'02C+=I@IEFHC7&-E5',I?E*33E/@3#68B%\"!B-/2%(G=*@D052IA!('7-*$+A6>.$89,-CG71=AGAE3&&#=2B.+I<E";
let hard_avg_obs = average_quality_from_cigar(hard_qual);
let hard_avg_exp:f64 = 21.40;
assert_eq!(hard_avg_obs, hard_avg_exp, "Tried to calculate the average quality for {}", hard_qual);
}
fn main(){
let mut opts = fasten_base_options();
opts.optflag("","each-read","Print the metrics for each read. This creates very large output");
let matches = fasten_base_options_matches("Gives read metrics on a read set.", opts);
if matches.opt_present("paired-end") {
logmsg("WARNING: --paired-end is not utilized in this script");
}
let each_read :bool=matches.opt_present("each-read");
let filename = "/dev/stdin";
if each_read {
println!("readID\treadLength\tavgQual");
} else {
println!("{}",vec!["totalLength", "numReads", "avgReadLength","avgQual"].join("\t"));
}
let mut read_length :Vec<f64> = vec![];
let mut read_qual :Vec<u8> = vec![];
let mut num_lines :u64 =0;
let my_file = File::open(&filename).expect("Could not open file");
let my_buffer=BufReader::new(my_file);
for line in my_buffer.lines() {
num_lines+=1;
let mod_line = num_lines % 4;
match mod_line {
1 => {
if each_read {
let id = line.expect("Expected an ID line");
print!("{}\t",&id[1..]);
}
}
2 => {
let my_read_length=line.expect("Expected a sequence line").len() as f64;
if each_read {
print!("{}\t",my_read_length);
}
read_length.push(my_read_length);
}
0 => {
let qual_line=line.expect("Expected a qual line");
let my_qual_vec: Vec<u8> = qual_line.into_bytes();
if each_read {
let my_avg_qual = avg_qual(&my_qual_vec, 33);
println!("{:.2}",my_avg_qual);
}
read_qual.extend(my_qual_vec.into_iter());
}
_ => {
}
};
}
let num_reads :u64 = num_lines / 4;
let total_length = read_length.iter().fold(0.0,|a,&b| a+b);
let mut summary_metrics=vec![total_length.to_string(),num_reads.to_string()];
let total_length_str = (total_length as f64/num_reads as f64).to_string();
let total_qual_str = format!("{:.2}", avg_qual(&read_qual, 33));
summary_metrics.push(total_length_str);
summary_metrics.push(total_qual_str.to_string());
if !each_read {
println!("{}", summary_metrics.join("\t"));
}
}
fn avg_qual(qual: &[u8], ascii_base: u8) -> f64 {
if qual.is_empty() {
return 0.0;
}
let qual_values: Vec<f64> = qual.iter()
.map(|&q| {
let phred = (q - ascii_base) as i16;
10_f64.powf(-phred as f64 / 10.0)
})
.collect();
if qual_values.is_empty() {
return 0.0;
}
let sum: f64 = qual_values.iter().sum();
let avg_prob = sum / qual_values.len() as f64;
-10.0 * avg_prob.log10()
}