Module Prelude.String

This is Ocaml's standard String with the additional functions from Strings and with Strings.Ops opened.

include module type of struct include Stdlib.String end
type t = string
val make : int -> char -> string
val init : int -> (int -> char) -> string
val empty : string
val of_bytes : bytes -> string
val to_bytes : string -> bytes
val length : string -> int
val get : string -> int -> char
val concat : string -> string list -> string
val cat : string -> string -> string
val equal : t -> t -> bool
val compare : t -> t -> int
val starts_with : prefix:string -> string -> bool
val ends_with : suffix:string -> string -> bool
val contains_from : string -> int -> char -> bool
val rcontains_from : string -> int -> char -> bool
val contains : string -> char -> bool
val sub : string -> int -> int -> string
val split_on_char : char -> string -> string list
val map : (char -> char) -> string -> string
val mapi : (int -> char -> char) -> string -> string
val fold_left : ('a -> char -> 'a) -> 'a -> string -> 'a
val fold_right : (char -> 'a -> 'a) -> string -> 'a -> 'a
val for_all : (char -> bool) -> string -> bool
val exists : (char -> bool) -> string -> bool
val escaped : string -> string
val uppercase_ascii : string -> string
val lowercase_ascii : string -> string
val capitalize_ascii : string -> string
val uncapitalize_ascii : string -> string
val index_from : string -> int -> char -> int
val index_from_opt : string -> int -> char -> int option
val rindex_from : string -> int -> char -> int
val rindex_from_opt : string -> int -> char -> int option
val index : string -> char -> int
val index_opt : string -> char -> int option
val rindex : string -> char -> int
val rindex_opt : string -> char -> int option
val to_seq : t -> char Stdlib.Seq.t
val to_seqi : t -> (int * char) Stdlib.Seq.t
val of_seq : char Stdlib.Seq.t -> t
val get_utf_8_uchar : t -> int -> Stdlib.Uchar.utf_decode
val is_valid_utf_8 : t -> bool
val get_utf_16be_uchar : t -> int -> Stdlib.Uchar.utf_decode
val is_valid_utf_16be : t -> bool
val get_utf_16le_uchar : t -> int -> Stdlib.Uchar.utf_decode
val is_valid_utf_16le : t -> bool
val create : int -> bytes
  • deprecated Use Bytes.create/BytesLabels.create instead.
val set : bytes -> int -> char -> unit
  • deprecated Use Bytes.set/BytesLabels.set instead.
val blit : string -> int -> bytes -> int -> int -> unit
val copy : string -> string
  • deprecated Strings now immutable: no need to copy
val fill : bytes -> int -> int -> char -> unit
  • deprecated Use Bytes.fill/BytesLabels.fill instead.
val uppercase : string -> string
  • deprecated Use String.uppercase_ascii/StringLabels.uppercase_ascii instead.
val lowercase : string -> string
  • deprecated Use String.lowercase_ascii/StringLabels.lowercase_ascii instead.
val capitalize : string -> string
  • deprecated Use String.capitalize_ascii/StringLabels.capitalize_ascii instead.
val uncapitalize : string -> string
  • deprecated Use String.uncapitalize_ascii/StringLabels.uncapitalize_ascii instead.
val get_uint8 : string -> int -> int
val get_int8 : string -> int -> int
val get_uint16_ne : string -> int -> int
val get_uint16_be : string -> int -> int
val get_uint16_le : string -> int -> int
val get_int16_ne : string -> int -> int
val get_int16_be : string -> int -> int
val get_int16_le : string -> int -> int
val get_int32_ne : string -> int -> int32
val get_int32_be : string -> int -> int32
val get_int32_le : string -> int -> int32
val get_int64_ne : string -> int -> int64
val get_int64_be : string -> int -> int64
val get_int64_le : string -> int -> int64
val unsafe_get : string -> int -> char
val unsafe_set : bytes -> int -> char -> unit
  • deprecated
val unsafe_blit : string -> int -> bytes -> int -> int -> unit
val unsafe_fill : bytes -> int -> int -> char -> unit
  • deprecated
include module type of struct include Strings end

Additional string functions.

Basic Functions

val len : string -> int

len = String.length.

val to_string : 'a -> 'a

(to_string s) = s.

val rev : string -> string

(rev str) is the reverse of str.

Invariant:

  • rev = (explode >> List.rev >> implode)
val random : ?printable:bool -> ?charset:char list -> ?size:(unit -> int) -> unit -> string

(random ?(printable=true) ?charset ?size ()) returns a random string of length (size ()) (default: < 64). Each of the characters in the string is the result of calling (Char.random ?printable () ?printable ?charset ()).

Indexing and Slicing

slice and the infix operators (#.) and (#!) do string indexing and slicing modeled on Python. There is currently no support for "steps".

Negative indexes are as in Python. Indexes "missing" in Python's slice notation are replaced with 0.

Indexes that are out-of-range are treated as the max or min string index as appropriate, so no exceptions are ever raised, e.g.:

(slice "" (0,100)) = "" 

Analogues:

  • Python: "str"[i] = Ocaml: "str"#!i
  • Python: "str"[i:j] = Ocaml: "str"#.(i,j)
  • Python: "str"[:j] = Ocaml: "str"#.(0,j)
  • Python: "str"[i:] = Ocaml: "str"#.(i,0)
  • Python: "str"[:] = Ocaml: "str"#.(0,0)
val slice : string -> (int * int) -> string

(slice str (l,u)) is Python string slicing.

Example: (slice "0123456789" (2,4)) = "23"

Predicates

val eq : string -> string -> bool

(eq a b) is type-specialized and inlined string equality.

val null : string -> bool

null is ((=) "").

val any : (char -> bool) -> string -> bool

(any p str) is true iff (p x) for any of the characters of str.

Invariant: (any p str) = (explode str |> map p |> ored)

val all : (char -> bool) -> string -> bool

(all p str) is true iff (p c) for all of the characters of str.

Invariant: (all p str) = (explode str |> map p |> anded)

val mem : char -> string -> bool

mem is (flip String.contains).

val only : these:string -> string -> bool

(only ~these str) is true iff str consists of only characters in these; tail recursive.

Partially apply for efficiency.

val anyof : these:string -> string -> bool

(anyof ~these str) is true iff str contains any of the characters in these; tail recursive.

Partially apply for efficiency.

val allof : these:string -> string -> bool

(allof ~these str) is true iff every character in these is contained in str; tail recursive.

Partially apply for efficiency.

val prefix : string -> string -> bool

(prefix s str) is true iff s is a prefix of str.

Invariants:

  • (prefix "" s) = true
  • (prefix s s) = true
val suffix : string -> string -> bool

(suffix s str) is true iff s is a suffix of str.

Invariants:

  • (suffix "" s) = true
  • (suffix s s) = true

Substrings

val substr : string -> string -> int option

(substr sub str) is (Some i) where i is the index of the first occurrence of sub in str, or None if sub does not occur in str.

Examples:

  • (substr "x" "") = None
  • (substr "x" "x") = Some 0
  • (substr "x" "xs") = Some 0
  • (substr "x" "zxs") = Some 1
  • (substr "x" "zs") = None
val take : int -> string -> string

(take n str) is the prefix of str of length n, or str itself if (n < 0 || n > len str).

val drop : int -> string -> string

(drop n str) is the suffix of str after the first n elements, or if (n < 0 || n > len str).

val splitat : int -> string -> string * string

(splitat n str) is (a,b) such that a is the (min n (len str))-byte prefix of str, and b is the remainder of the string.

N.B. if n > (len str) then (len @@ fst @@ splitat n str = len str).

(splitat n str) is equivalent to (take n str, drop n str).

val takeall : int -> string -> string list

(takeall n str) returns the sequential substrings of length n in str.

  • (takeall 3 digits) = ["012"; "345"; "678"; "9"]

Invariant: (takeall n str |> String.concat "") = str

  • raises Invalid_argument

    if n < 1

val takewhile : (char -> bool) -> string -> string

(takewhile p str) is the leading string of chars of str for which p is true.

  • (takewhile (contains digits) "7654abc123") = "7654"
  • (takewhile (contains digits) "x7654abc") = ""
val dropwhile : (char -> bool) -> string -> string

(dropwhile p str) is str trimmed of leading chars for which p is true.

  • (dropwhile (contains digits) "7654abc") = "abc"
  • (dropwhile (contains digits) "x7654abc") = "x7654abc"
val splitwhile : (char -> bool) -> string -> string * string

(splitwhile p str) is (takewhile p str, dropwhile p str).

Folding Over Strings

val foldl : ('a -> char -> 'a) -> 'a -> string -> 'a

(foldl f acc s) is a left-fold over the chars in a string; (f (...(f (f acc s.[0]) s.[1])...) s.[m]) with m = String.length s - 1.

Tail-recursive.

val foldr : (char -> 'a -> 'a) -> 'a -> string -> 'a

(foldr f acc s) is a right-fold over the chars in a string; (f s.[0] (f s.[1] (...(f s.[m] acc) )...)) with m = String.length s - 1.

Not tail-recursive.

val foldlines : ?start:int -> ?sep:char -> (int -> 'a -> string -> 'a) -> 'a -> string -> 'a

(foldlines ?start ?sep f init str) folds (f i acc line) over the implied lines in str, with init as the initial value for acc.

Compared to doing (String.split str |> List.foldl), String.foldlines avoids allocations, and is typically 500-600% faster.

f's i parameter is the offset in str of the next line to be processed; the offset of the start of the line is always available as (i - String.len line).

It can be useful to return i if you are exiting the fold early with Exn.return e.g. foldlines could be restarted where you left off by passing ~start:i.

?sep is the line-separator (default: '\n'); you can fold over other, non-line, chunks if you like.

Example:

foldlines (fun _i -> (snocwhen (String.len >> gt 200))) [] str

is the list of lines more than 200 bytes long in str.

val maps : (char -> string) -> string -> string

(maps f str) is like String.map except f returns a string instead of a char.

val filter : (char -> bool) -> string -> string

(filter p str) is all the characters of the string str that satisfy the predicate p.

The order of the characters in the input string is preserved.

  • author Matt Teichman

Iterating Over Strings

val iter : (char -> unit) -> string -> unit

(iter f str) calls (f c) for each character c in str (for side-effects).

val iteri : (int -> char -> unit) -> string -> unit

(iteri f str) calls (f i c) for each character c and zero-based index i in str (for side-effects).

Strings and Character Lists

val explode : string -> char list

(explode s) is the list of characters comprising the string s.

Example: explode "abc" = ['a'; 'b'; 'c']

val to_list : string -> char list

to_list is explode.

val implode : char list -> string

implode is the inverse of explode.

Example: (explode $ implode) "abc" = "abc"

val of_list : char list -> string

of_list is implode.

Strings and Character Arrays

val of_array : char array -> string

(of_array a) is the string consisting of all the characters of a.

val to_array : string -> char array

(to_array s) is the array consisting of all the characters of s.

Predefined Strings

All the characters in these predefined strings occur in lexicographic order.

val whitespace : string

whitespace is a string consisting of the ASCII whitespace characters:

"\t\n\x0B\x0C\r "

val digits : string

digits is a string consisting of the ASCII decimal digits:

"0123456789"

val alphabet : string

alphabet is a string consisting of the lowercase ASCII alphabetic characters:

"abcdefghijklmnopqrstuvwxyz"

val miniscules : string

miniscules is alphabet.

val majuscules : string

majuscules is (uppercase_ascii alphabet):

"ABCDEFGHIJKLMNOPQRSTUVWXYZ"

Splitting and Joining

val split : ?elide:bool -> ?complement:bool -> ?sep:string -> string -> string list

(split ?elide ?complement ?sep str) is the list of (possibly empty) substrings that are delimited by any of the set of chars contained in the string sep (default: whitespace). If complement is true (default: false), the separator characters are any that are not contained in sep. Empty substrings are omitted in the list if elide is true (the default).

The sep string is treated as US-ASCII.

(split "foo \t bar\nbaz") = ["foo"; "bar"; "baz"] 
(split ~complement:true ~sep:String.(miniscules ^ majuscules) "foo BAR1765 isn't") = ["foo"; "BAR"; "isn"; "t"] 

(split ~elide:false ~sep str) is the parser for lines in simple delimited file formats such as /etc/passwd's :-delimited format, tab-delimited formats, etc.

(split ~elide:false ~sep:":" ":foo:bar::baz:") = [""; "foo"; "bar"; ""; "baz"; ""] 
(split ~elide:true  ~sep:":" ":foo:bar::baz:") = ["foo"; "bar"; "baz"] 
val join : ?elide:bool -> ?sep:string -> string list -> string

(join ?(elide=false) ?sep list) is String.concat sep; the default sep is " ".

If (elide = true), empty strings in list are ignored (requires an additional pass).

val cut : sep:string -> string -> string * string option

(cut ~sep str) divides str into two parts (left, Some right) which are delimited by the leftmost occurrence of sep. If there is no occurrence of sep in str, then the result is (str, None).

Examples:

  • (cut ~sep:"--" "") = ("", None)
  • (cut ~sep:"--" "x") = ("x",None)
  • (cut ~sep:"--" "x--") = ("x",Some "")
  • (cut ~sep:"--" "x--y") = ("x",Some "y")
  • raises Invalid_argument

    if ~sep is "".

val cuts : sep:string -> string -> string list

cuts ~sep str is the list of substrings of str that are delimited by occurrences of sep.

Note the differences between these:

  • (cuts ~sep:"--" "1--2---3") = ["1"; "2"; "-3"]
  • (split ~elide:false ~sep:"--" "1--2---3") = ["1"; ""; "2"; ""; ""; "3"]
  • (split ~elide:false ~sep:"-" "1--2---3") = ["1"; ""; "2"; ""; ""; "3"]
  • (split ~elide:true ~sep:"--" "1--2---3") = ["1"; "2"; "3"]
  • (split ~elide:true ~sep:"-" "1--2---3") = ["1"; "2"; "3"]
  • raises Invalid_argument

    if ~sep is "".

Modifying and Formatting Strings

val replace : string -> string -> string -> string

(replace subj rep str) replaces all occurences of subj in str with rep.

(replace subj rep) = (cuts ~sep:subj $ concat rep) 
  • raises Invalid_argument

    if subj is "".

val pad : ?left:bool -> ?def:char -> int -> string -> string

(pad ?(left=false) ?(def=' ') n str) pads xs on the right (or on the left, if (left = true)) with enough instances of def such that the length of the result is at least n.

If you need the result list to be exactly of length n (never longer), use:

  • (take n $ pad ~def n)

Invariant: (len (pad ~left ~def n xs) = max n (len xs))

val translate : string -> string -> string -> string

(translate xs ys str) transliterate the characters in str which appear in xs to the corresponding character in ys.

If (len ys < len xs), ys will be padded on the right with ys#!(-1). For example:

  • (translate majuscules "!" "FooBar" = "!oo!ar")

If (xs = ""), the returned value is str. Raises Failure if (len ys = 0 || len xs < len ys).

Example:

  • (translate miniscules majuscules "foobar") = "FOOBAR"
val prepend : string -> string -> string

(prepend prefix str) is (prefix ^ str).

val postpend : string -> string -> string

(postpend suffix str) is (str ^ suffix).

val append : string -> string -> string
  • deprecated

    Poor name choice; use postpend.

val trimleft : string -> string -> string

(trimleft cs) is (dropwhile (contains cs)).

val trimright : string -> string -> string

trimright is (rev >> trimleft cs >> rev).

val trim : string -> string -> string

(trim cs) is (trimleft cs >> trimright cs).

val commas : ?comma:char -> string -> string

(commas ?comma num) formats the (presumed) numeric string num with commas in the conventional manner.

For example, (commas "3628800") = "3,628,800".

All OCaml integer and float literals are accepted, but only decimal integers are commafied; hex, octal, and binary integers, and all floating point numbers are returned unmodified.

Leading zeros, a leading '+'-sign, and all underscore spacers are elided in the commafied number.

Other non-numeric strings will have commas inserted into them, but you shouldn't rely on this behavior due to all the special cases above.

val plural : ?reg:(string -> string) -> ?irr:string -> int -> string -> string

(plural ?reg ?irr n word) returns the possibly plural form of the singular word in the context of the number n.

Use ~irr to provide a fixed irregular plural form. (plural ~irr:x) is equivalent to (plural ~reg:(k x)).

~reg is a function that returns the regular plural of a word; the default is for English and is (postpend "s"); see also es.

Pluralization is done as for English, viz. the singular is used for 1 and all other numbers, including 0 and negative numbers, use the plural form.

Examples:

  • (List.map (id *** flip plural "dog") [-1;0;1;2]) = [(-1, "dogs"); (0, "dogs"); (1, "dog"); (2, "dogs")]
  • (List.map (id *** flip (plural ~irr:"oxen") "ox") [-1;0;1;2]) = [(-1, "oxen"); (0, "oxen"); (1, "ox"); (2, "oxen")]
val es : string -> string

(es w) is (postpend "es"); it is an alternative pluralization function for certain regularly irregular English nouns.

Example:

  • (List.map (id *** flip (plural ~reg:es) "wrass") [-1;0;1;2]) = [(-1, "wrasses"); (0, "wrasses"); (1, "wrass"); (2, "wrasses")]
val ocaml : string -> string

(ocaml str) converts its argument to OCaml syntax, wrapping it in double quotes and adding escapes as necessary.

val parens : (char * char) -> char Stream.t -> [> `P of 'a list | `S of string ] as 'a list

(parens (l,r) s) parses the string on Stream s into a forest, recognizing l and r as left and right parentheses respectively.

The forest is a list of nodes; each node is either (`S s) where s is a string, or (`P f) where f is the (sub)forest representing a prenthesized expression.

If the parsed string contains an unterminated parenthesized expression, Failure is raised.

Example:

  • (Stream.of_string "foo" |> parens ('(',')')) = [`S "foo"]
  • (Stream.of_string "(foo)" |> parens ('(',')')) = [`P [`S "foo"]]
  • (Stream.of_string "a(foo)z" |> parens ('(',')')) = [`S "a"; `P [`S "foo"]; `S "z"]
  • (Stream.of_string "a(foo(bar))z" |> parens ('(',')')) = [`S "a"; `P [`S "foo"; `P [`S "bar"]]; `S "z"]

Unmatched right parens are allowed. Example:

  • (Stream.of_string "1) one" |> parens ('(',')')) = [`S "1) one"]
val string_of_parens : (char * char) -> [< `P of 'a | `S of string ] list as 'a -> string

(string_of_parens (l,r)) converts the parsed forest returned by (parens (l,r) s) into a string.

(string_of_parens (l,r)) is the inverse of (Stream.of_string >> parens (l,r)).

val seq_of_fields : sep:string -> string -> string Seq.t

(seq_of_fields ~sep str) is the sequence of sep-separated fields in str.

sep is as for cut.

val seq_of_lines : string -> string Seq.t

(seq_of_lines) is the sequence of "\n"-separated lines in str

Ops

module Ops = Strings.Ops

Infix and prefix operators.

include module type of struct include Strings.Ops end

Infix and prefix operators.

val (#!) : string -> int -> char

(#!) is (#!).

val (#.) : string -> (int * int) -> string

(#.) is (#.).

val (--) : char -> char -> char list

(--) is (--).