url.ml

1: (* 2: * url -- command line url builder 3: * the below url can be generated via: 4: * 5: * url www keith/ 6: * 7: * at my domain (lib.uchicago.edu); equivalent commands are: 8: * 9: * url www.lib keith 10: * url www.lib.uchicago.edu keith 11: * url http www.lib.uchicago.edu keith 12: * url http www.lib.uchicago.edu /keith 13: * 14: * an arg after the path is consider a query part; multiple such args 15: * are taken pairwise as typical CGI GET variable=value parameters 16: * (don't give = or &, they will be provided) 17: * 18: * everything is url-encoded appropriately 19: * 20: * prerequisites: ocurl (OCaml bindings to libcurl) and libcurl 21: * 22: * various ways to build; assume ocurl is in $OCURL (or available via findlib) 23: * 24: * ocamlc -o url -I $OCURL str.cma cgi.ml curl.cma unix.cma url.ml 25: * ocamake -o url -I $OCURL *.ml unix.cma str.cma curl.cma 26: * ocamake -opt -o url -I $OCURL *.ml unix.cmxa str.cmxa curl.cmxa 27: * ocamlfind ocamlc -o url -package str,unix,curl -linkpkg cgi.cmo url.cmo 28: * 29: * Keith Waclena <http://www.lib.uchicago.edu/keith/> 30: *) 31: 32: exception Usage of string (* usage errors *) 33: 34: (* usage message *) 35: let usage = Printf.sprintf "Usage: %s [scheme] hostname[:port] [path [var value] ...]" (Filename.basename Sys.argv.(0)) 36: 37: let check = ref false (* check url *) 38: let fetch = ref false (* fetch url to stdout *) 39: let redirect = ref false (* chase redirects *) 40: let args = ref [] 41: 42: let pairmap f (a,b) = f a, f b 43: 44: (* split string str on regexp pat *) 45: let split pat str = Str.split_delim (Str.regexp pat) str 46: 47: (* nand's [format::j]: join elements of list on string sep but only if non-blank 48: * bug: right now "blank" is defined as just "" rather than nand's "whitespace" 49: *) 50: let rec j sep = function 51: | [] -> "" 52: | hd::[] -> hd 53: | hd::tl -> let rest = j sep tl in 54: match hd,rest with 55: | "","" -> "" 56: | "",_ -> rest 57: | _,"" -> hd 58: | _ -> hd^sep^rest 59: 60: (* given two lists, return true if pat is a list prefix of subj, false otherwise *) 61: let rec prefix pat subj = 62: if pat = [] 63: then true 64: else if subj = [] 65: then false 66: else if List.hd pat = List.hd subj 67: then prefix (List.tl pat) (List.tl subj) 68: else false 69: 70: (* return "most expansive" version of hostname, e.g. at my domain: 71: * resolve "www" = "www.lib.uchicago.edu" 72: * if hostname doesn't exist, just return it as is 73: *) 74: let resolve hostname = 75: (* get dns host entry for hostname, or fake one up *) 76: let he = 77: try Unix.gethostbyname hostname 78: with Not_found -> { 79: Unix.h_name = hostname; Unix.h_aliases = [||]; 80: Unix.h_addrtype = Unix.PF_UNIX; Unix.h_addr_list = [||] 81: } 82: in 83: let names = he.Unix.h_name::(Array.to_list he.Unix.h_aliases) (* combine hostname and cnames *) 84: in 85: if List.exists (fun elt -> elt = hostname) names 86: then hostname (* prefer exact match *) 87: else 88: let hostnamematch elt = prefix (split "\\." hostname) (split "\\." elt) in 89: let hits = List.filter hostnamematch names in 90: if hits = [] 91: then hostname (* no prefix matches; fall back to original *) 92: else 93: (* sort comparison by "reverse domain name length" *) 94: let bylength a b = 95: (* all this splitting should be hoisted up *) 96: let la, lb = List.length (split "\\." a), List.length (split "\\." b) in 97: compare lb la 98: in 99: (* hostname is a prefix of some of the host's names; sort and pick the longest *) 100: List.hd (List.sort bylength hits) 101: 102: let checkurl url = 103: Curl.global_init Curl.CURLINIT_GLOBALALL; 104: try 105: let writer data = () in 106: let connection = Curl.init () in 107: Curl.set_url connection url; 108: Curl.set_writefunction connection writer; 109: Curl.set_followlocation connection !redirect; 110: Curl.perform connection; 111: let code = (Curl.get_httpcode connection) in 112: if 300 <= code && code < 400 (* redirect *) 113: then 114: (* you would think that (Curl.get_effectiveurl connection) 115: would return the value of the Location: header here, but no... *) 116: Printf.printf "%d\t%s\t%s\n" code url "NYI" 117: else Printf.printf "%d\t%s\n" code url 118: with 119: | Curl.CurlException (reason, code, str) -> 120: Printf.fprintf stderr "Error: %s\n" str; exit 1 121: 122: let fetchurl url = 123: Curl.global_init Curl.CURLINIT_GLOBALALL; 124: try 125: let connection = Curl.init () in 126: Curl.set_url connection url; 127: Curl.set_followlocation connection !redirect; 128: Curl.perform connection 129: with 130: | Curl.CurlException (reason, code, str) -> 131: Printf.fprintf stderr "Error: %s\n" str; exit 1 132: 133: (* turn ["a";"b";"c";"d"] representing url var/value pairs into "a=b&c=d" 134: * where everything is properly url-encoded 135: *) 136: let doquery list = 137: let rec loop = function 138: | var::value::rest -> (Printf.sprintf "%s=%s" (Cgi.encode var) (Cgi.encode value))::(loop rest) 139: | var::[] -> [Cgi.encode var] 140: | [] -> [] 141: in 142: String.concat "&" (loop list) 143: 144: let construct args = 145: let construct' scheme hostport path query = 146: let host, port = (* encode host and port *) 147: pairmap Cgi.encode ( 148: match split ":" hostport with 149: | [] -> hostport, "" 150: | [h] -> h, "" 151: | [h;p] -> h, p 152: | _ -> hostport, "" 153: ) 154: in 155: let resolved = j ":" [(resolve host); port] in (* reassemble *) 156: let path = 157: Str.global_replace (Str.regexp "^/+") "/" ("/" ^ path) (* assure leading "/" *) 158: in 159: j "?" [Printf.sprintf "%s://%s%s" scheme resolved path; doquery query] 160: in 161: match args with 162: | [] -> failwith "can't happen" 163: | scheme::[] -> raise (Usage "hostname[:port] required") 164: | scheme::hostport::[] -> construct' scheme hostport "/" [] 165: | scheme::hostport::path::query -> construct' scheme hostport path query 166: 167: let speclist = [ 168: ("-c", Arg.Unit (fun () -> check := true), ": check url"); 169: ("-f", Arg.Unit (fun () -> fetch := true), ": fetch url to stdout"); 170: ("-r", Arg.Unit (fun () -> redirect := true), ": chase redirects"); 171: ("--", Arg.Rest (fun arg -> args := !args @ [arg]), 172: ": stop interpreting options") 173: ] 174: 175: (* main routine: handle cmdline options and args *) 176: let main () = 177: let collect arg = args := !args @ [arg] in 178: let msg = usage in 179: let _ = Arg.parse speclist collect msg in 180: let hd, tl = try List.hd !args, List.tl !args with Failure _ -> "http", [] in 181: let url = 182: if List.mem hd ["http"; "https"; "http:"; "https:"] (* if scheme provided ... *) 183: then 184: let scheme = Str.global_replace (Str.regexp ":$") "" hd (* strip trailing ":" *) 185: in construct (scheme::tl) 186: else 187: construct ("http"::!args) 188: in 189: try 190: match !check, !fetch with 191: (* -c -f *) 192: | true, true -> raise (Usage "-c and -f are mutually exclusive") 193: | true, false -> checkurl url 194: | false, true -> fetchurl url 195: | false, false -> print_endline url 196: with Usage err -> 197: prerr_endline err; prerr_endline usage; exit 1 198: 199: let _ = main () 200:

This document was generated using caml2html