extern crate getopts;
extern crate fasten;
extern crate fastq;
extern crate bam;
use bam::RecordReader;
use fasten::fasten_base_options;
use fasten::fasten_base_options_matches;
use fasten::logmsg;
use std::io::stdin;
use fastq::{Parser, Record};
use std::sync::mpsc::channel;
#[derive(Debug, Clone)]
struct FastenSeq {
id1: String,
seq1: String,
qual1: String,
id2: String,
seq2: String,
qual2: String,
}
impl FastenSeq{
fn new() -> FastenSeq{
return FastenSeq{
id1: String::new(),
seq1: String::new(),
qual1: String::new(),
id2: String::new(),
seq2: String::new(),
qual2: String::new(),
};
}
fn as_fastq(&self) -> String {
let mut entry:String = format!("@{}\n{}\n+\n{}",
self.id1, self.seq1, self.qual1);
if !self.id2.is_empty() {
entry = format!("{}\n@{}\n{}\n+\n{}",
entry, self.id2, self.seq2, self.qual2);
}
return entry;
}
fn as_fasta(&self) -> String {
let mut entry:String = format!(">{}\n{}",
self.id1, self.seq1);
if !self.id2.is_empty() {
entry = format!("{}\n>{}\n{}",
entry, self.id2, self.seq2);
}
return entry;
}
fn as_sam(&self) -> String {
let mut flag = "4"; if !self.id2.is_empty() {
flag = "77"; }
let mut entry:String = vec![self.id1.as_str(), flag, "*", "0", "0", "*", "*", "0", "0", self.seq1.as_str(), self.qual1.as_str()].join("\t");
if !self.id2.is_empty() {
let entry2:String = vec![self.id2.as_str(), "141", "*", "0", "0", "*", "*", "0", "0", self.seq2.as_str(), self.qual2.as_str()].join("\t");
entry = format!("{}\n{}", entry, entry2);
}
return entry;
}
}
fn main(){
let mut opts = fasten_base_options();
opts.optopt("i", "in-format", "The input format for stdin. FORMAT can be: fastq, fasta, sam.", "FORMAT");
opts.optopt("o", "out-format", "The output format for stdin. See --in-format for FORMAT options.", "FORMAT");
let description:String = format!("Converts between sequence formats.");
let matches = fasten_base_options_matches(&description, opts);
let paired_end = matches.opt_present("paired-end");
let in_format = matches.opt_default("in-format", "fastq")
.unwrap_or(String::from("fastq"))
.to_lowercase();
let out_format = matches.opt_default("out-format","fastq")
.unwrap_or(String::from("fastq"))
.to_lowercase();
let (tx, rx):(std::sync::mpsc::Sender<FastenSeq>,std::sync::mpsc::Receiver<FastenSeq>) = channel();
match in_format.as_str() {
"fastq" => {read_fastq(tx, paired_end);}
"sam" => {read_sam(tx, paired_end);}
"fasta" => {read_fasta(tx, paired_end);}
_ => {panic!("Unknown input format {}", in_format);}
};
match out_format.as_str() {
"fastq" => {write_fastq(rx);}
"fasta" => {write_fasta(rx);}
"sam" => {write_sam(rx);}
_ => {panic!("Unknown output format {}", out_format);}
};
}
fn read_fasta(tx:std::sync::mpsc::Sender<FastenSeq>, paired_end:bool){
if paired_end {
logmsg("--paired-end was given but it is ignored in a fasta context");
}
let stdin = stdin();
let mut seq:FastenSeq = FastenSeq::new();
let bytes = stdin.read_line(&mut seq.id1).unwrap();
seq.id1 = seq.id1.trim().to_string();
if bytes == 0 {
panic!("Zero byte stdin");
}
let gt_sign = seq.id1.remove(0);
if gt_sign != '>' {
panic!("ERROR: first character of first line of stdin is not '>' as expected. Found {}", gt_sign);
}
loop{
let mut buffer = String::new();
let bytes = stdin.read_line(&mut buffer).unwrap();
buffer = buffer.trim().to_string();
if bytes == 0 {
seq.qual1 = (0..seq.seq1.len()).map(|_| "I").collect::<String>();
tx.send(seq).expect("Sending seq object to writer");
break;
}
else if &buffer[0..1]==">" {
seq.qual1 = (0..seq.seq1.len()).map(|_| "I").collect::<String>();
tx.send(seq).expect("Sending seq object to writer");
seq = FastenSeq::new();
seq.id1 = buffer;
let gt_sign = seq.id1.remove(0);
if gt_sign != '>' {
panic!("ERROR: first character of first line of stdin is not '>' as expected. Found {}", gt_sign);
}
}
else {
seq.seq1 = format!("{}{}", seq.seq1, buffer);
}
}
}
fn read_sam(tx:std::sync::mpsc::Sender<FastenSeq>, paired_end:bool){
if paired_end {
logmsg("--paired-end given but paired-endedness will be determined by sam format flags");
}
let mut reader = bam::SamReader::from_path("/dev/stdin").unwrap();
let mut r = bam::Record::new();
loop {
let mut seq:FastenSeq = FastenSeq::new();
match reader.read_into(&mut r) {
Ok(false) => break,
Ok(true) => {},
Err(e) => panic!("{}", e),
}
seq.id1 = String::from(std::str::from_utf8(r.name()).unwrap());
seq.seq1 = String::from_utf8(r.sequence().to_vec()).unwrap();
seq.qual1 = String::from_utf8(r.qualities().to_readable()).unwrap();
if r.flag().is_paired() {
match reader.read_into(&mut r) {
Ok(false) => break,
Ok(true) => {},
Err(e) => panic!("{}", e),
}
seq.id2 = String::from(std::str::from_utf8(r.name()).unwrap());
seq.seq2 = String::from_utf8(r.sequence().to_vec()).unwrap();
seq.qual2 = String::from_utf8(r.qualities().to_readable()).unwrap();
}
tx.send(seq).expect("Sending seq object to writer");
}
}
fn read_fastq(tx:std::sync::mpsc::Sender<FastenSeq>, paired_end:bool){
let parser = Parser::new(stdin());
let mut parser_getter = parser.ref_iter();
parser_getter.advance().expect("Could not read the first fastq entry");
while let Some(record1) = parser_getter.get() {
let mut seq:FastenSeq = FastenSeq::new();
seq.id1 = std::str::from_utf8(record1.head()).unwrap().to_string();
seq.seq1 = std::str::from_utf8(record1.seq()).unwrap().to_string();
seq.qual1 = std::str::from_utf8(record1.qual()).unwrap().to_string();
if paired_end {
match &parser_getter.advance() {
Ok(_) => {},
Err(err) => {
panic!("ERROR: could not read the second entry in a paired end read: {}", err);
}
};
let record2 = &parser_getter.get().expect("ERROR parsing second pair in a paired end read");
seq.id2 = std::str::from_utf8(record2.head()).unwrap().to_string();
seq.seq2 = std::str::from_utf8(record2.seq()).unwrap().to_string();
seq.qual2 = std::str::from_utf8(record2.qual()).unwrap().to_string();
}
tx.send(seq).expect("Sending seq object to writer");
match &parser_getter.advance() {
Ok(_) => {},
Err(_) => {break;}
};
}
}
fn write_fastq(rx:std::sync::mpsc::Receiver<FastenSeq>){
let receiver = rx.iter();
for seq in receiver {
println!("{}", seq.as_fastq());
}
}
fn write_fasta(rx:std::sync::mpsc::Receiver<FastenSeq>){
let receiver = rx.iter();
for seq in receiver {
println!("{}", seq.as_fasta());
}
}
fn write_sam(rx:std::sync::mpsc::Receiver<FastenSeq>){
let receiver = rx.iter();
for seq in receiver {
println!("{}", seq.as_sam());
}
}