ocolumn.ml

1: (* ocolumn: a version of *BSD's column(1) that doesn't dump core on long lines 2: Keith Waclena <http://www.lib.uchicago.edu/keith/> 3: *) 4: 5: open Utils 6: open Printf 7: 8: (* "for programs that run for a short time but allocate like crazy, *) 9: (* the default value of Gc.space_overhead is a little too low and causes *) 10: (* the garbage collector to work unnecessarily hard" *) 11: let _ = Gc.set { (Gc.get()) with Gc.space_overhead = 100 } 12: 13: (* print usage message *) 14: let usage = sprintf "Usage: %s [-i STR] [-m] [-o STR] [-r REGEXP] [-s STR] [-t] [--] file ..." 15: 16: (* cmdline parameters and related *) 17: 18: let isep = ref None (* input field separator *) 19: let merge = ref false (* merge adjacent empty fields *) 20: let osep = ref " " (* output field separator *) 21: let regexp = ref (Some "\t") (* regular expression split *) 22: let compiled = ref (Pcre.regexp "") (* compiled version of !regexp *) 23: let names = ref [] (* filenames *) 24: let sepopts = ref 0 25: 26: let get = function 27: | Some x -> x 28: | None -> raise Not_found 29: 30: (* speclist for Arg.parse cmdline parsing *) 31: let speclist = [ 32: ("-i", Arg.String (fun s -> isep := Some s; regexp := None; incr sepopts), 33: "STR: input field separator (default: '%s')"); 34: ("-m", Arg.Unit (fun () -> merge := true), 35: (sprintf ": merge adjacent empty fields (like awk; default: %s)" (if !merge then "true" else "false"))); 36: ("-o", Arg.String (fun s -> osep := s), 37: (sprintf "STR: output field separator (default: '%s')" (String.escaped !osep))); 38: ("-r", Arg.String (fun s -> regexp := Some s; isep := None; incr sepopts), 39: "REGEXP: field separator as regular expression"); 40: ("-s", Arg.String (fun s -> isep := Some s; osep := s; regexp := None; incr sepopts), 41: "STR: field separator (sets -i and -o)"); 42: ("-t", Arg.Unit (fun () -> ()), 43: ": noop for compatibility with BSD column"); 44: ("--", Arg.Rest (fun name -> names := !names @ [name]), 45: ": stop interpreting options"); 46: ] 47: 48: (* return maximum int in a list of ints *) 49: let maxwidth = List.fold_left max 0 50: 51: (* our most important data structure is an int list list, representing *) 52: (* a matrix of column widths; the matrix must be rectangular. maxcols *) 53: (* takes such a matrix and reduces it to an int list indicating the *) 54: (* max width in each column of the matrix. if the matrix has N *) 55: (* rows and M columns, the result of maxcols is a list of length M *) 56: let maxcols = (List.map maxwidth) & transpose 57: 58: let splitter str = 59: match !regexp with 60: | Some r -> Pcre.split ~rex:!compiled ~max:(-1) str 61: | None -> split ~merge:!merge (get !isep) str 62: 63: (* columnate files in argv according to widths in widths 64: widths is a list of integer max column widths (such as returned by maxcols) 65: argv is a list of filenames 66: *) 67: let columnate widths argv = 68: (* columnate (as above) one file (open on chan) *) 69: let columnatechan chan = 70: (* given list of strings (ie fields) widen each one and join into a 71: string, trimming trailing spaces *) 72: let widenfields fields = 73: (* pad s out to n chars with spaces *) 74: let widen n s = 75: let len = String.length s in 76: assert (len <= n); 77: s ^ (String.make (n-len) ' ') 78: in 79: trimright ' ' (String.concat !osep (List.map2 widen widths fields)) 80: in 81: (* widen fields of a line and print result *) 82: let widenprint line = print_endline (widenfields (splitter line)) 83: in 84: iterlines widenprint chan 85: in 86: List.iter (with_open_in_file columnatechan) argv 87: 88: (* return widths of fields in line as list of ints *) 89: let linewidths len line = 90: let fields = List.map String.length (splitter line) in 91: match len with 92: | Some n -> 93: if List.length fields <> n 94: then 95: let err = (sprintf "non-rectangular data (NOT all lines have %d fields)" n) in 96: raise (Invalid_argument err) 97: else fields 98: | None -> fields 99: 100: (* return an int list list of all "linewidths" in chan. a *) 101: (* "linewidth" is an int list representing the widths of all the *) 102: (* fields in a given line; a list of such linewidths represents the *) 103: (* column widths of all the lines in a file (open on chan) *) 104: let getlinewidths chan = 105: let widths1 = linewidths None (input_line chan) in 106: let n = List.length widths1 in 107: widths1::(maplines (linewidths (Some n)) chan) 108: 109: (* get max widths of columns in file open on chan as list of ints *) 110: let getcolwidths = maxcols & getlinewidths 111: 112: (* get max widths across all named files *) 113: let getfilewidths argv = maxcols (List.map (with_open_in_file getcolwidths) argv) 114: 115: (* check command line options *) 116: let checkopts () = 117: match !regexp with 118: | Some r -> 119: if !merge then raise (Arg.Bad "-m and -r are incompatible"); 120: if !sepopts > 1 then raise (Arg.Bad "-r is incompatible with -i and -s"); 121: compiled := Pcre.regexp r; 122: () 123: | None -> 124: () 125: 126: (* main routine: handle cmdline options and args, and columnate *) 127: let main () = 128: let collect name = names := !names @ [name] in 129: let msg = (usage (basename Sys.argv.(0))) in 130: let _ = Arg.parse speclist collect msg in 131: let argv = (tmpstdin (if !names = [] then ["-"] else !names)) in 132: checkopts (); 133: columnate (getfilewidths argv) argv 134: 135: let _ = main () 136:

This document was generated using caml2html