1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
//! Interleaves reads from either stdin or file parameters.
//! 
//! Many fasten executables are aware of paired end reads
//! but they need to be in interleaved format.
//! This script transforms R1 and R2 reads into interleaved format.
//! 
//! # Examples
//! 
//! ## Shuffling
//! 
//! ### Simple transformation of R1 and R2 into interleaved
//! ```bash
//! cat file_1.fastq file_2.fastq | fasten_shuffle > interleaved.fastq
//! fasten_shuffle -1 file_1.fastq -2 file_2.fastq > interleaved.fastq
//! ```
//! ### interleave R1 and R2 and pipe it into another executable with --paired-end
//! ```bash
//! cat file_1.fastq file_2.fastq | fasten_randomize --paired-end | head -n 8 > random-pair.fastq
//! ```
//! ### ... or to another executable with --paired-end
//! ```bash
//! cat file_1.fastq file_2.fastq | fasten_sample --paired-end --frequency 0.2 > downsample.20percent.fastq
//! ```
//! 
//! ## Deshuffling
//! 
//! ```bash
//! cat interleaved.fastq | fasten_shuffle -d -1 1.fastq -2 2.fastq
//! ```
//! 
//! # Usage
//! 
//! ```text
//! Usage: fasten_shuffle [-h] [-n INT] [-p] [-v] [-d] [-1 1.fastq] [-2 2.fastq]
//! 
//! Options:
//!     -h, --help          Print this help menu.
//!     -n, --numcpus INT   Number of CPUs (default: 1)
//!     -p, --paired-end    The input reads are interleaved paired-end
//!     -v, --verbose       Print more status messages
//!     -d, --deshuffle     Deshuffle reads from stdin
//!     -1 1.fastq          Forward reads. If deshuffling, reads are written to
//!                         this file.
//!     -2 2.fastq          Forward reads. If deshuffling, reads are written to
//!                         this file.
//! ```

extern crate getopts;
extern crate fasten;
use std::fs::File;
use std::io::Write;
use std::io::BufReader;

use fasten::fasten_base_options;
use fasten::fasten_base_options_matches;
use fasten::io::fastq;
use fasten::io::seq::Cleanable;
use fasten::io::seq::Seq;
use fasten::logmsg;

fn main(){
    let mut opts = fasten_base_options();
    //script-specific flags
    opts.optflag("d","deshuffle","Deshuffle reads from stdin");
    opts.optopt("1","","Forward reads. If deshuffling, reads are written to this file.","1.fastq");
    opts.optopt("2","","Forward reads. If deshuffling, reads are written to this file.","2.fastq");

    let matches = fasten_base_options_matches("Interleaves reads from either stdin or file parameters", opts);
    
    if matches.opt_present("paired-end") {
        logmsg("WARNING: --paired-end was supplied but it is assumed for this script anyway");
    }

    if matches.opt_present("deshuffle") {
        deshuffle(&matches);
    } else {
        shuffle(&matches);
    }
}

/// Read from stdin and deshuffle reads into files
fn deshuffle(matches: &getopts::Matches) -> () {
    
    // Where are we reading to?  Get those filenames.
    let r1_filename = if matches.opt_present("1") {
        matches.opt_str("1").unwrap()
    } else {
        "/dev/stdout".to_string()
    };
    let r2_filename = if matches.opt_present("2") {
        matches.opt_str("2").unwrap()
    } else {
        "/dev/stdout".to_string()
    };

    let mut file1 = File::create(r1_filename).expect("ERROR: could not write to file");
    let mut file2 = File::create(r2_filename).expect("ERROR: could not write to file");

    // read stdin
    let my_file = File::open("/dev/stdin").expect("Could not open file");
    let my_buffer=BufReader::new(my_file);
    let fastq_reader=fastq::FastqReader::new(my_buffer);
    let mut read_counter=0;
    for seq in fastq_reader {
        
        // print to file 1 and to file 2, alternating each Seq
        if read_counter % 2 == 0 {
            write!(file1,"{}\n",seq.to_string()).unwrap();
        } else {
            write!(file2,"{}\n",seq.to_string()).unwrap();
        }
        read_counter+=1;
    }

}

/// Read fastq from stdin and interleave
fn shuffle(matches: &getopts::Matches) -> () {

    // Where are we reading from?  Get those filenames.
    let r1_filename = if matches.opt_present("1") {
        matches.opt_str("1").unwrap()
    } else {
        "/dev/stdin".to_string()
    };
    let r2_filename = if matches.opt_present("2") {
        matches.opt_str("2").unwrap()
    } else {
        "/dev/stdin".to_string()
    };

    // Read 1 first, and read 2 is halfway down.
    // Unfortunately this means that it all goes into ram.
    let mut seqs1 = read_seqs(&r1_filename);
    let mut seqs2 = read_seqs(&r2_filename);
    let mut num_pairs = seqs1.len();

    // If reading R1 from stdin, it is possible that seqs2 
    // is empty. If so, redistribute half the reads from 
    // seqs1 into seqs2.
    if seqs2.len() == 0 {
        num_pairs = ((num_pairs as f32)/2.0).ceil() as usize;
        // put it all into seqs_all and truncate the seqs
        let seqs_all = seqs1.clone();
        seqs1 = vec![];
        let mut seq_idx = 0;
        while seq_idx < num_pairs {
            if seq_idx + num_pairs >= seqs_all.len() {
                logmsg("Looks like one of the R2 reads is missing. Skipping an R1/R2 pair.");
                logmsg("If this is in error, please see fasten_validate --paired-ends");
                num_pairs = seq_idx;
                break;
            }
            seqs1.push(seqs_all[seq_idx].clone());
            seqs2.push(seqs_all[seq_idx + num_pairs].clone());
            seq_idx += 1;
        }
        // Free up some memory
        drop(seqs_all);
    }

    for i in  0..num_pairs  {
        seqs1[i].print();
        seqs2[i].print();
    }

}

/// Read fastq entries from a filename
fn read_seqs(filename: &String) -> Vec<Seq> {

    let my_file = File::open(&filename).expect("Could not open file");
    let my_buffer=BufReader::new(my_file);
    let fastq_reader=fastq::FastqReader::new(my_buffer);
    let seqs :Vec<Seq> = fastq_reader.collect();
    return seqs;
}