1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
//! Determine paired-end-ness in an interleaved file.
//! Exit code of 0 indicates PE. Exit code > 0 indicates single end.
//!
//! # Examples
//!
//! ## Test the file and then print a message with the exit code
//! ```bash
//! cat file.fastq | fasten_pe; echo "Reads were paired-end? $?";
//! ```
//! ## Test the file and if it is paired end (exit code 0), then print a message
//! ```bash
//! cat file.fastq | fasten_pe || echo "Reads were paired end.";
//! ```
//!
//! # Usage
//!
//! ```text
//! Usage: fasten_pe [-h] [-n INT] [-p] [-v] [--print-reads]
//!
//! Options:
//! -h, --help Print this help menu.
//! -n, --numcpus INT Number of CPUs (default: 1)
//! -p, --paired-end The input reads are interleaved paired-end
//! -v, --verbose Print more status messages
//! --print-reads Print each read. Useful for Unix pipes.
//! ```
extern crate fasten;
extern crate regex;
use fasten::fasten_base_options;
use fasten::fasten_base_options_matches;
use fasten::logmsg;
use regex::Regex;
use std::fs::File;
use std::io::BufReader;
use std::io::BufRead;
fn main() {
let mut opts = fasten_base_options();
opts.optflag("","print-reads","Print each read. Useful for Unix pipes.");
let matches = fasten_base_options_matches("Determine paired-end-ness in an interleaved file. Exit code of 0 indicates PE. Exit code > 0 indicates SE.", opts);
if matches.opt_present("paired-end") {
logmsg("WARNING: --paired-end was supplied but this script is supposed to determine whether the input is paired-end.");
}
let should_print = matches.opt_present("print-reads");
// Save the top X ID pairs in a vector
// and compare them in several functions that test for
// IDs. If any test returns true, then we can say that
// it is paired-end input.
let mut id1_vec :Vec<String>=Vec::new();
let mut id2_vec :Vec<String>=Vec::new();
let my_file = File::open("/dev/stdin").expect("Could not open file");
let my_buffer=BufReader::new(my_file);
let mut lines = my_buffer.lines();
while let Some(line) = lines.next() {
let id1 = line.expect("ERROR parsing id line in R1");
let mut entry=id1.clone();
entry.push('\n');
for _ in 1..4 { // move ahead three lines
let other_line = lines.next()
.expect("ERROR getting next line")
.expect("ERROR parsing next line");
entry.push_str(&other_line);
entry.push('\n');
}
let id2 = lines.next()
.expect("ERROR getting R2. This is not a paired end file.")
.expect("ERROR parsing next line in R2");
entry.push_str(&id2);
entry.push('\n');
for _ in 1..4 { // move ahead three lines
let other_line = lines.next()
.expect("ERROR getting next line in R2. This is not a paired end file.")
.expect("ERROR parsing next line in R2");
entry.push_str(&other_line);
entry.push('\n');
}
id1_vec.push(id1);
id2_vec.push(id2);
if should_print {
print!("{}",entry);
}
}
// For every function we can think of, throw the ID
// vectors against them. If any of them return >0,
// then we have a positive
let mut is_paired_end :u8 = 0;
is_paired_end += is_paired_end_slash12(&id1_vec, &id2_vec);
is_paired_end += is_paired_end_miseq(&id1_vec, &id2_vec);
// If this is not paired end, return a nonzero
if is_paired_end == 0 {
if matches.opt_present("verbose") {
logmsg("I do not think this is interleaved paired end");
}
std::process::exit(1);
}
if matches.opt_present("verbose") {
logmsg("The fastq input seems to be interleaved paired-end");
}
}
/// Detect whether the vector of IDs represent paired-endedness
/// with finding out whether they end with `/1` and `/2`.
fn is_paired_end_slash12(id1_ref: &Vec<String>, id2_ref: &Vec<String>) -> u8 {
// We are using iterators and want to reset them
let id1_vec = id1_ref.clone();
let id2_vec = id2_ref.clone();
// @.../1
let slash_r1r2_regex = Regex::new(r"(.+)/([12])$").expect("malformed qual regex");
let mut id2_iter = id2_vec.iter();
for id1 in id1_vec.iter(){
let id2 = id2_iter.next()
.expect("ERROR getting next id2");
let caps1_result = slash_r1r2_regex.captures(&id1);
let caps2_result = slash_r1r2_regex.captures(&id2);
// Test for whether the regex matched
if caps1_result.is_none() || caps2_result.is_none() {
return 0;
}
// If we have a match then get the captured values
let caps1 = caps1_result
.expect("ERROR: could not regex against id1");
let caps2 = caps2_result
.expect("ERROR: could not regex against id2");
// Make sure the base name matches
if caps1[1] != caps2[1] {
return 0;
}
// Make sure there is a 1/2 combo
if &caps1[2] != "1" || &caps2[2] != "2" {
return 0;
}
}
return 1;
}
/// Detect whether the vector of IDs represent paired-endedness
/// with finding out whether they fit the pattern shown in Illumina documentation at
/// <http://support.illumina.com/content/dam/illumina-support/help/BaseSpaceHelp_v2/Content/Vault/Informatics/Sequencing_Analysis/BS/swSEQ_mBS_FASTQFiles.htm>
fn is_paired_end_miseq(id1_ref: &Vec<String>, id2_ref: &Vec<String>) -> u8 {
// We are using iterators and want to reset them
let id1_vec = id1_ref.clone();
let id2_vec = id2_ref.clone();
let mut id2_iter = id2_vec.iter();
for id1 in id1_vec.iter(){
let id2 = id2_iter.next()
.expect("ERROR getting next id2");
// Get the 7th field which is a combination of the
// Y position and the read number, separated by a
// space.
let id1_tmp = id1.split(":").nth(6);
let id2_tmp = id2.split(":").nth(6);
if id1_tmp.is_none() || id2_tmp.is_none() {
return 0;
}
// Get the read number. It has to be 1 and 2.
let id1_read_number = id1_tmp.unwrap().split(" ").nth(1);
let id2_read_number = id2_tmp.unwrap().split(" ").nth(1);
if id1_read_number.is_none() || id2_read_number.is_none() {
return 0;
}
if id1_read_number.unwrap() != "1" || id2_read_number.unwrap() != "2" {
return 0;
}
}
return 1;
}