(* ocolumn: a version of *BSD's column(1) that doesn't dump core on long lines
   Keith Waclena <http://www.lib.uchicago.edu/keith/>
*)

open Utils
open Printf

(* "for programs that run for a short time but allocate like crazy, *)
(* the default value of Gc.space_overhead is a little too low and causes *)
(* the garbage collector to work unnecessarily hard" *)
let _ = Gc.set { (Gc.get()) with Gc.space_overhead = 100 }

(* print usage message *)
let usage = sprintf "Usage: %s [-i STR] [-m] [-o STR] [-r REGEXP] [-s STR] [-t] [--] file ..."

(* cmdline parameters and related *)

let isep = ref None			(* input field separator *)
let merge = ref false			(* merge adjacent empty fields *)
let osep = ref "  "			(* output field separator *)
let regexp = ref (Some "\t")		(* regular expression split *)
let compiled = ref (Pcre.regexp "")	(* compiled version of !regexp *)
let names = ref []			(* filenames *)
let sepopts = ref 0

let get = function
  | Some x -> x
  | None   -> raise Not_found

(* speclist for Arg.parse cmdline parsing *)
let speclist = [
  ("-i", Arg.String (fun s -> isep := Some s; regexp := None; incr sepopts),
   "STR: input field separator (default: '%s')");
  ("-m", Arg.Unit   (fun () -> merge := true),
   (sprintf ": merge adjacent empty fields (like awk; default: %s)" (if !merge then "true" else "false")));
  ("-o", Arg.String (fun s -> osep := s),
   (sprintf "STR: output field separator (default: '%s')" (String.escaped !osep)));
  ("-r", Arg.String (fun s -> regexp := Some s; isep := None; incr sepopts),
   "REGEXP: field separator as regular expression");
  ("-s", Arg.String (fun s -> isep := Some s; osep := s; regexp := None; incr sepopts),
   "STR: field separator (sets -i and -o)");
  ("-t", Arg.Unit   (fun () -> ()),
   ": noop for compatibility with BSD column");
  ("--", Arg.Rest   (fun name -> names := !names @ [name]),
   ": stop interpreting options");
]

(* return maximum int in a list of ints *)
let maxwidth = List.fold_left max 0

(* our most important data structure is an int list list, representing *)
(* a matrix of column widths; the matrix must be rectangular.  maxcols *)
(* takes such a matrix and reduces it to an int list indicating the *)
(* max width in each column of the matrix.  if the matrix has N *)
(* rows and M columns, the result of maxcols is a list of length M *)
let maxcols = (List.map maxwidth) & transpose

let splitter str =
  match !regexp with
    | Some r -> Pcre.split ~rex:!compiled ~max:(-1) str
    | None   -> split ~merge:!merge (get !isep) str

(* columnate files in argv according to widths in widths
   widths is a list of integer max column widths (such as returned by maxcols)
   argv is a list of filenames
 *)
let columnate widths argv =
  (* columnate (as above) one file (open on chan) *)
  let columnatechan chan =
    (* given list of strings (ie fields) widen each one and join into a
       string, trimming trailing spaces *)
    let widenfields fields =
      (* pad s out to n chars with spaces *)
      let widen n s =
	let len = String.length s in
	  assert (len <= n);
	  s ^ (String.make (n-len) ' ')
      in
	trimright ' ' (String.concat !osep (List.map2 widen widths fields))
    in 
      (* widen fields of a line and print result *)
    let widenprint line = print_endline (widenfields (splitter line))
    in 
      iterlines widenprint chan
  in
    List.iter (with_open_in_file columnatechan) argv

(* return widths of fields in line as list of ints *)
let linewidths len line =
  let fields = List.map String.length (splitter line) in
    match len with
      | Some n ->
	  if List.length fields <> n
	  then
	    let err = (sprintf "non-rectangular data (NOT all lines have %d fields)" n) in
	      raise (Invalid_argument err)
	  else fields
      | None -> fields

(* return an int list list of all "linewidths" in chan.  a *)
(* "linewidth" is an int list representing the widths of all the *)
(* fields in a given line; a list of such linewidths represents the *)
(* column widths of all the lines in a file (open on chan) *)
let getlinewidths chan =
  let widths1 = linewidths None (input_line chan) in
  let n = List.length widths1 in
    widths1::(maplines (linewidths (Some n)) chan)

(* get max widths of columns in file open on chan as list of ints *)
let getcolwidths = maxcols & getlinewidths 

(* get max widths across all named files *)
let getfilewidths argv = maxcols (List.map (with_open_in_file getcolwidths) argv)

(* check command line options *)
let checkopts () =
  match !regexp with
    | Some r ->
	if !merge then raise (Arg.Bad "-m and -r are incompatible");
	if !sepopts > 1 then raise (Arg.Bad "-r is incompatible with -i and -s");
	compiled := Pcre.regexp r;
	()
    | None ->
	()

(* main routine: handle cmdline options and args, and columnate *)
let main () =
  let collect name = names := !names @ [name] in
  let msg = (usage (basename Sys.argv.(0))) in
  let _ = Arg.parse speclist collect msg in
  let argv = (tmpstdin (if !names = [] then ["-"] else !names)) in
    checkopts ();
    columnate (getfilewidths argv) argv

let _ = main ()

This document was generated using caml2html