1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
//! Blunt-end trims using 0-based coordinates
//!
//! # Examples
//!
//! ## Trim five bases from the right side
//! ```bash
//! cat file.fastq | fasten_trim -l -5 > trimmed.fastq
//! ```
//!
//! ## Keep a maximum of 100bp
//! ```bash
//! cat file.fastq | fasten_trim -l 99 > trimmed.fastq
//! ```
//! ## Trim 5bp from the left side
//! ```bash
//! cat file.fastq | fasten_trim -f 4 > trimmed.fastq
//! ```
//!
//! # Usage
//!
//! ```text
//! Usage: fasten_trim [-h] [-n INT] [-p] [-v] [-f INT] [-l INT]
//!
//! Options:
//! -h, --help Print this help menu.
//! -n, --numcpus INT Number of CPUs (default: 1)
//! -p, --paired-end The input reads are interleaved paired-end
//! -v, --verbose Print more status messages
//! -f, --first-base INT
//! The first base to keep (default: 0)
//! -l, --last-base INT The last base to keep. If negative, counts from the
//! right. (default: 0)
//! ```
extern crate fasten;
extern crate statistical;
extern crate getopts;
extern crate threadpool;
use std::fs::File;
use std::io::BufReader;
use std::cmp::min;
use fasten::fasten_base_options;
use fasten::fasten_base_options_matches;
use fasten::logmsg;
use fasten::io::fastq;
use fasten::io::seq::Seq;
fn main(){
let mut opts = fasten_base_options();
// script-specific options
opts.optopt("f","first-base","The first base to keep (default: 0)","INT");
opts.optopt("l","last-base","The last base to keep (default: 0)","INT");
let matches = fasten_base_options_matches("Blunt-end trims using 0-based coordinates", opts);
let first_base:usize ={
if matches.opt_present("first-base") {
matches.opt_str("first-base")
.expect("ERROR: could not understand parameter --first-base")
.parse()
.expect("ERROR: --first-base is not an INT")
} else {
0
}
};
let last_base:usize ={
if matches.opt_present("last-base") {
matches.opt_str("last-base")
.expect("ERROR: could not understand parameter --last-base")
.parse()
.expect("ERROR: --last-base is not an INT")
} else {
0
}
};
let _num_cpus:usize = {
if matches.opt_present("numcpus") {
/*
matches.opt_str("numcpus")
.expect("ERROR: could not understand parameter --numcpus")
.parse()
.expect("ERROR: --numcpus is not an INT");
*/
logmsg("Warning: multithreading this script currently slows it down. Resetting to 1 cpu. Avoid this warning by not using --numcpus");
1 as usize
} else {
1 as usize
}
};
// Read from stdin
let my_file = File::open("/dev/stdin").expect("Could not open file");
let my_buffer=BufReader::new(my_file);
let fastq_reader = fastq::FastqReader::new(my_buffer);
let fastq_iter = fastq_reader.into_iter();
for seq in fastq_iter {
let trimmed:String = trim_worker(seq, first_base, last_base);
println!("{}", trimmed);
}
}
/// Trim a set of fastq entries and send it to a channel
fn trim_worker(seq:Seq, first_base:usize, last_base:usize ) -> String {
// The last position is either the last_base parameter
// or the last position in the string, whichever is less.
let last_base_tmp = match last_base {
// But if the position is not specified, then it is the seq length
0 => {
// zero based
seq.seq.len()-1
},
_ => {
min(seq.seq.len()-1, last_base)
}
};
let sequence = &seq.seq[first_base..last_base_tmp];
let quality = &seq.qual[first_base..last_base_tmp];
let trimmed = format!("{}\n{}\n+\n{}", seq.id, sequence, quality);
return trimmed;
}