url.ml
1: (*
2: * url -- command line url builder
3: * the below url can be generated via:
4: *
5: * url www keith/
6: *
7: * at my domain (lib.uchicago.edu); equivalent commands are:
8: *
9: * url www.lib keith
10: * url www.lib.uchicago.edu keith
11: * url http www.lib.uchicago.edu keith
12: * url http www.lib.uchicago.edu /keith
13: *
14: * an arg after the path is consider a query part; multiple such args
15: * are taken pairwise as typical CGI GET variable=value parameters
16: * (don't give = or &, they will be provided)
17: *
18: * everything is url-encoded appropriately
19: *
20: * prerequisites: ocurl (OCaml bindings to libcurl) and libcurl
21: *
22: * various ways to build; assume ocurl is in $OCURL (or available via findlib)
23: *
24: * ocamlc -o url -I $OCURL str.cma cgi.ml curl.cma unix.cma url.ml
25: * ocamake -o url -I $OCURL *.ml unix.cma str.cma curl.cma
26: * ocamake -opt -o url -I $OCURL *.ml unix.cmxa str.cmxa curl.cmxa
27: * ocamlfind ocamlc -o url -package str,unix,curl -linkpkg cgi.cmo url.cmo
28: *
29: * Keith Waclena <http://www.lib.uchicago.edu/keith/>
30: *)
31:
32: exception Usage of string (* usage errors *)
33:
34: (* usage message *)
35: let usage = Printf.sprintf "Usage: %s [scheme] hostname[:port] [path [var value] ...]" (Filename.basename Sys.argv.(0))
36:
37: let check = ref false (* check url *)
38: let fetch = ref false (* fetch url to stdout *)
39: let redirect = ref false (* chase redirects *)
40: let args = ref []
41:
42: let pairmap f (a,b) = f a, f b
43:
44: (* split string str on regexp pat *)
45: let split pat str = Str.split_delim (Str.regexp pat) str
46:
47: (* nand's [format::j]: join elements of list on string sep but only if non-blank
48: * bug: right now "blank" is defined as just "" rather than nand's "whitespace"
49: *)
50: let rec j sep = function
51: | [] -> ""
52: | hd::[] -> hd
53: | hd::tl -> let rest = j sep tl in
54: match hd,rest with
55: | "","" -> ""
56: | "",_ -> rest
57: | _,"" -> hd
58: | _ -> hd^sep^rest
59:
60: (* given two lists, return true if pat is a list prefix of subj, false otherwise *)
61: let rec prefix pat subj =
62: if pat = []
63: then true
64: else if subj = []
65: then false
66: else if List.hd pat = List.hd subj
67: then prefix (List.tl pat) (List.tl subj)
68: else false
69:
70: (* return "most expansive" version of hostname, e.g. at my domain:
71: * resolve "www" = "www.lib.uchicago.edu"
72: * if hostname doesn't exist, just return it as is
73: *)
74: let resolve hostname =
75: (* get dns host entry for hostname, or fake one up *)
76: let he =
77: try Unix.gethostbyname hostname
78: with Not_found -> {
79: Unix.h_name = hostname; Unix.h_aliases = [||];
80: Unix.h_addrtype = Unix.PF_UNIX; Unix.h_addr_list = [||]
81: }
82: in
83: let names = he.Unix.h_name::(Array.to_list he.Unix.h_aliases) (* combine hostname and cnames *)
84: in
85: if List.exists (fun elt -> elt = hostname) names
86: then hostname (* prefer exact match *)
87: else
88: let hostnamematch elt = prefix (split "\\." hostname) (split "\\." elt) in
89: let hits = List.filter hostnamematch names in
90: if hits = []
91: then hostname (* no prefix matches; fall back to original *)
92: else
93: (* sort comparison by "reverse domain name length" *)
94: let bylength a b =
95: (* all this splitting should be hoisted up *)
96: let la, lb = List.length (split "\\." a), List.length (split "\\." b) in
97: compare lb la
98: in
99: (* hostname is a prefix of some of the host's names; sort and pick the longest *)
100: List.hd (List.sort bylength hits)
101:
102: let checkurl url =
103: Curl.global_init Curl.CURLINIT_GLOBALALL;
104: try
105: let writer data = () in
106: let connection = Curl.init () in
107: Curl.set_url connection url;
108: Curl.set_writefunction connection writer;
109: Curl.set_followlocation connection !redirect;
110: Curl.perform connection;
111: let code = (Curl.get_httpcode connection) in
112: if 300 <= code && code < 400 (* redirect *)
113: then
114: (* you would think that (Curl.get_effectiveurl connection)
115: would return the value of the Location: header here, but no... *)
116: Printf.printf "%d\t%s\t%s\n" code url "NYI"
117: else Printf.printf "%d\t%s\n" code url
118: with
119: | Curl.CurlException (reason, code, str) ->
120: Printf.fprintf stderr "Error: %s\n" str; exit 1
121:
122: let fetchurl url =
123: Curl.global_init Curl.CURLINIT_GLOBALALL;
124: try
125: let connection = Curl.init () in
126: Curl.set_url connection url;
127: Curl.set_followlocation connection !redirect;
128: Curl.perform connection
129: with
130: | Curl.CurlException (reason, code, str) ->
131: Printf.fprintf stderr "Error: %s\n" str; exit 1
132:
133: (* turn ["a";"b";"c";"d"] representing url var/value pairs into "a=b&c=d"
134: * where everything is properly url-encoded
135: *)
136: let doquery list =
137: let rec loop = function
138: | var::value::rest -> (Printf.sprintf "%s=%s" (Cgi.encode var) (Cgi.encode value))::(loop rest)
139: | var::[] -> [Cgi.encode var]
140: | [] -> []
141: in
142: String.concat "&" (loop list)
143:
144: let construct args =
145: let construct' scheme hostport path query =
146: let host, port = (* encode host and port *)
147: pairmap Cgi.encode (
148: match split ":" hostport with
149: | [] -> hostport, ""
150: | [h] -> h, ""
151: | [h;p] -> h, p
152: | _ -> hostport, ""
153: )
154: in
155: let resolved = j ":" [(resolve host); port] in (* reassemble *)
156: let path =
157: Str.global_replace (Str.regexp "^/+") "/" ("/" ^ path) (* assure leading "/" *)
158: in
159: j "?" [Printf.sprintf "%s://%s%s" scheme resolved path; doquery query]
160: in
161: match args with
162: | [] -> failwith "can't happen"
163: | scheme::[] -> raise (Usage "hostname[:port] required")
164: | scheme::hostport::[] -> construct' scheme hostport "/" []
165: | scheme::hostport::path::query -> construct' scheme hostport path query
166:
167: let speclist = [
168: ("-c", Arg.Unit (fun () -> check := true), ": check url");
169: ("-f", Arg.Unit (fun () -> fetch := true), ": fetch url to stdout");
170: ("-r", Arg.Unit (fun () -> redirect := true), ": chase redirects");
171: ("--", Arg.Rest (fun arg -> args := !args @ [arg]),
172: ": stop interpreting options")
173: ]
174:
175: (* main routine: handle cmdline options and args *)
176: let main () =
177: let collect arg = args := !args @ [arg] in
178: let msg = usage in
179: let _ = Arg.parse speclist collect msg in
180: let hd, tl = try List.hd !args, List.tl !args with Failure _ -> "http", [] in
181: let url =
182: if List.mem hd ["http"; "https"; "http:"; "https:"] (* if scheme provided ... *)
183: then
184: let scheme = Str.global_replace (Str.regexp ":$") "" hd (* strip trailing ":" *)
185: in construct (scheme::tl)
186: else
187: construct ("http"::!args)
188: in
189: try
190: match !check, !fetch with
191: (* -c -f *)
192: | true, true -> raise (Usage "-c and -f are mutually exclusive")
193: | true, false -> checkurl url
194: | false, true -> fetchurl url
195: | false, false -> print_endline url
196: with Usage err ->
197: prerr_endline err; prerr_endline usage; exit 1
198:
199: let _ = main ()
200:
This document was generated using caml2html