package bio_io

  1. Overview
  2. Docs

In_channel for Btab files where each query in the file is a single record.

Overview

WARNING: This module assumes that queries are sorted. One case in which this assumption does not hold is with mmseqs when using more than one iteration. E.g., mmseqs easy-search --num-iterations 3. This behavior will likely change in the future.

You should consider this module experimental.

Example

Here is a short example program. It reads a btab file and prints out the records.

open! Base
open! Bio_io.Btab_queries

let parse_argv () =
  match Sys.get_argv () with
  | [| _; file_name |] -> file_name
  | _ -> failwith "missing file_name"

let () =
  let file_name = parse_argv () in
  In_channel.with_file_iter_records_exn file_name ~f:(fun r ->
      Stdio.print_endline "===";
      Stdio.print_endline @@ Record.query r;
      let hits = List.map ~f:Bio_io.Btab.Record.parse @@ Record.hits r in
      Stdio.print_s @@ [%sexp_of: Bio_io.Btab.Record.Parsed.t list] hits)

The output will be somthing like.

===
Q 1
(((query "Q 1") (target q1t1) (pident 0.1) (alnlen 2) (mismatch 3)
  (gapopen 4) (qstart 5) (qend 6) (tstart 7) (tend 8) (evalue 9.99E-05)
  (bits 10) (qlen ()) (tlen ()))
 ((query "Q 1") (target q1t2) (pident 0.11) (alnlen 12) (mismatch 13)
  (gapopen 14) (qstart 15) (qend 16) (tstart 17) (tend 18) (evalue 1.9E-05)
  (bits 20) (qlen ()) (tlen ())))
===
Q_2
(((query Q_2) (target q2t1) (pident 0.21) (alnlen 22) (mismatch 23)
  (gapopen 24) (qstart 25) (qend 26) (tstart 27) (tend 28) (evalue 2.9E-05)
  (bits 30) (qlen ()) (tlen ())))
module T : sig ... end
include module type of struct include T end
include module type of struct include Private.Peekable_in_channel end

A wrapper of Jane Street's Stdio.In_channel. Add's peek_char and peek_line functions that work on fifos as well as regular files.

Used internally for bio input channels so that you can pipe directly from gzip even in channels that need peeking.

Differences from Stdio.In_channel

  • No binary mode

Some functions are not implemented.

  • input
  • really_input
  • really_input_exn
  • input_char
  • input_byte
  • input_binary_int
  • unsafe_input_value
  • input_buffer
  • seek
  • pos
  • length
  • set_binary_mode_out
include Ppx_compare_lib.Equal.S with type t := t
val input_all : t -> Base.string
val input_line : ?fix_win_eol:Base.bool -> t -> Base.string Base.option
val input_line_exn : ?fix_win_eol:Base.bool -> t -> Base.string
val fold_lines : ?fix_win_eol:Base.bool -> t -> init:'a -> f:('a -> Base.string -> 'a) -> 'a
val input_lines : ?fix_win_eol:Base.bool -> t -> Base.string Base.list
val iter_lines : ?fix_win_eol:Base.bool -> t -> f:(Base.string -> Base.unit) -> Base.unit
val read_lines : ?fix_win_eol:Base.bool -> Base.string -> Base.string Base.list
val read_all : Base.string -> Base.string

Both peek functions are safe to call in the context of one of the iterator functions.

val peek_char : ?fix_win_eol:Base.bool -> t -> Base.char Base.option
val peek_line : ?fix_win_eol:Base.bool -> t -> Base.string Base.option
type record = Record.t
include sig ... end
val stdin : T.t
val create_exn : Base.string -> T.t
val create : Base.string -> T.t Base.Or_error.t
val close_exn : T.t -> Base.unit
val close : T.t -> Base.unit Base.Or_error.t
val with_file_exn : Base.string -> f:(T.t -> 'a) -> 'a
val with_file : Base.string -> f:(T.t -> 'a) -> 'a Base.Or_error.t
val equal : T.t -> T.t -> Base.bool
val input_record_exn : T.t -> T.record Base.option
val input_record : T.t -> T.record Base.option Base.Or_error.t
val fold_records_exn : T.t -> init:'a -> f:('a -> T.record -> 'a) -> 'a
val fold_records : T.t -> init:'a -> f:('a -> T.record -> 'a) -> 'a Base.Or_error.t
val foldi_records_exn : T.t -> init:'a -> f:(Base.int -> 'a -> T.record -> 'a) -> 'a
val foldi_records : T.t -> init:'a -> f:(Base.int -> 'a -> T.record -> 'a) -> 'a Base.Or_error.t
val with_file_fold_records_exn : Base.string -> init:'a -> f:('a -> T.record -> 'a) -> 'a
val with_file_fold_records : Base.string -> init:'a -> f:('a -> T.record -> 'a) -> 'a Base.Or_error.t
val with_file_foldi_records_exn : Base.string -> init:'a -> f:(Base.int -> 'a -> T.record -> 'a) -> 'a
val with_file_foldi_records : Base.string -> init:'a -> f:(Base.int -> 'a -> T.record -> 'a) -> 'a Base.Or_error.t
val iter_records_exn : T.t -> f:(T.record -> Base.unit) -> Base.unit
val iter_records : T.t -> f:(T.record -> Base.unit) -> Base.unit Base.Or_error.t
val iteri_records_exn : T.t -> f:(Base.int -> T.record -> Base.unit) -> Base.unit
val iteri_records : T.t -> f:(Base.int -> T.record -> Base.unit) -> Base.unit Base.Or_error.t
val with_file_iter_records_exn : Base.string -> f:(T.record -> Base.unit) -> Base.unit
val with_file_iter_records : Base.string -> f:(T.record -> Base.unit) -> Base.unit Base.Or_error.t
val with_file_iteri_records_exn : Base.string -> f:(Base.int -> T.record -> Base.unit) -> Base.unit
val with_file_iteri_records : Base.string -> f:(Base.int -> T.record -> Base.unit) -> Base.unit Base.Or_error.t
val records_exn : T.t -> T.record Base.List.t
val records : T.t -> T.record Base.List.t Base.Or_error.t
val with_file_records_exn : Base.string -> T.record Base.List.t
val with_file_records : Base.string -> T.record Base.List.t Base.Or_error.t
val record_sequence_exn : T.t -> T.record Base.Sequence.t
val record_sequence : T.t -> T.record Base.Or_error.t Base.Sequence.t
OCaml

Innovation. Community. Security.