From: "Vladimir N. Silyaev" <vsilyaev@mindspring.com>
To: caml-list@inria.fr
Subject: [Caml-list] Re: Common IO structure
Date: Sun, 2 May 2004 23:12:12 -0700 [thread overview]
Message-ID: <20040503061212.GA64216@server.vns.oc.ca.us> (raw)
[-- Attachment #1: Type: text/plain, Size: 1402 bytes --]
I'm relatively new to ocaml and very new to this list. Browsing thru recent
discussion about standard IO structure, I felt that discussion was cycled
over what signature of the "universal" class should look like, so it would
satisfy all possible needs.
However looks like prospective users, of this new IO, have rather
contradictive requirements to the IO in general. And I'm thinking could
standard IO only to provide basic signature of the IO modules and framework
for layered IO. Doing so additional functionality could be added
incrementally, without affection core IO and achieving interoperability
between different libraries which are using that IO.
Last day or two I was playing with ocaml and ocaml module system, and
sketched some variant of basic IO. This sketch based on imperative
streams, where stream is module parameterized by the symbol type. However
blocked based IO is also supported, test code includes naive functors to
translate from a block based IO to a stream IO.
If you felt interested, please look into the attached file io.ml.
In the file you would found wrappers for Pervasive file IO, rudimentary
socket I/O, naive UTF8 filter and generic I/O algorithms. At the end
of file there are several test cases to exercise I/O extensibility.
File is self sufficient example and one should be able to compile it or run
in the toplevel.
Regards,
Vladimir
[-- Attachment #2: io.ml --]
[-- Type: text/plain, Size: 10705 bytes --]
module Stream = struct
module type Read = sig
type t
type symbol
val get: t -> symbol
end
module type Write = sig
type t
type symbol
val put: t -> symbol -> unit
end
end
module Block =
struct
module type Read =
sig
type t
val read: t -> string -> int -> int -> int
end
module type Write =
sig
type t
val write: t -> string -> int -> int -> unit
end
end
module Filter = struct
module type Read = sig
include Stream.Read
type source
val flush: t -> unit
val attach: source -> t
end
module type Write = sig
include Stream.Write
type dest
val flush: t -> unit
val attach: dest -> t
end
end
module File = struct
module Read = struct
type file = Pervasives.in_channel
type t = file
let _open name :t = Pervasives.open_in_bin name
let close (t:t) = Pervasives.close_in t
let seek = seek_in
let pos = pos_in
type symbol = char
let get (t:t) = Pervasives.input_char t
let read (t:t) ic buf pos = Pervasives.input t ic buf pos
end
module Write = struct
type file = Pervasives.out_channel
type t = file
let _open name :t = Pervasives.open_out_bin name
let close (t:t) = Pervasives.close_out t
let seek = seek_out
let pos = pos_out
type symbol = char
let put (t:t) ch = Pervasives.output_char t ch
let write = Pervasives.output
end
end
module Buffer = struct
module Read(B:Block.Read) : (Filter.Read with type symbol=char and type source=B.t) = struct
type t = {
buf : String.t;
mutable pos : int;
mutable level: int;
source: B.t
}
type source = B.t
type symbol = char
let attach b =
let blen = 1024 in
{
buf = String.create blen;
pos = 0;
level = 0;
source = b
}
let get t =
if t.pos = t.level then begin
match B.read t.source t.buf 0 (String.length t.buf) with
0 -> raise End_of_file
| n ->
t.pos <- 0;
t.level <- n
end;
let ch = t.buf.[t.pos] in
t.pos <- t.pos + 1;
ch
let flush t =
t.pos <- 0;
t.level <- 0
end
module Write(B:Block.Write) : (Filter.Write with type symbol=char and type dest=B.t) = struct
type dest = B.t
type symbol = char
type t = {
buf : String.t;
mutable pos : int;
dest: B.t
}
let attach t = {
buf = String.create 256;
pos = 0;
dest = t;
}
let flush t =
B.write t.dest t.buf 0 t.pos;
t.pos <- 0
let put t ch =
t.buf.[t.pos] <- ch;
t.pos <- t.pos + 1;
if t.pos >= String.length t.buf then flush t
end
end
module Socket = struct
type sock = Unix.file_descr
type t = sock
let create ?(domain=Unix.PF_INET) ?(protocol=0) _type :t = Unix.socket domain _type protocol
let close = Unix.close
module Read = struct
type t = sock
let read = Unix.read
let shutdown s = Unix.shutdown s Unix.SHUTDOWN_RECEIVE
end
module Write = struct
type t = sock
let rec write t buf off len =
match Unix.write t buf off len with
n when n=len -> ()
| n -> write t buf (off+n) (len-n)
let shutdown s = Unix.shutdown s Unix.SHUTDOWN_SEND
end
let connect ?sock addr :(Read.t*Write.t) =
let s = match sock with
Some sock -> sock
| None -> create Unix.SOCK_STREAM in
Unix.connect s addr;
(s,s)
end
module UTF8 = struct
exception InvalidSymbol
type utf8 = int
type t = utf8
module Read(Src:Stream.Read with type symbol=char) : (Filter.Read with type symbol=utf8 and type source=Src.t) = struct
type symbol = utf8
type t = Src.t
type source = Src.t
let attach (t:Src.t) : t = t
let flush t = ()
let get t =
let next t =
let ch = int_of_char (Src.get t) in
if (ch land 0xC0) = 0x80 then ch land 0x3F else raise InvalidSymbol in
let ch0 = int_of_char (Src.get t) in
if ch0 < 0x80 then ch0
else if ch0 < 0xE0 then
let ch1 = next t in
let ch = ((ch0 land 0x1F) lsl 6) lor ch1 in
if ch < 0x80 then raise InvalidSymbol
else ch
else if ch0 < 0xF0 then
let ch1 = next t in
let ch2 = next t in
let ch = ((ch0 land 0x0F) lsl 12) lor (ch1 lsl 6) lor ch2 in
if ch < 0x800 then raise InvalidSymbol
else ch
else if ch0 < 0xF8 then
let ch1 = next t in
let ch2 = next t in
let ch3 = next t in
let ch = ((ch0 land 0x03) lsl 18) lor (ch1 lsl 12) lor (ch2 lsl 6) lor ch3 in
if ch < 0x10000 then raise InvalidSymbol
else ch
else raise InvalidSymbol
end
module Write(Dst:Stream.Write with type symbol=char): (Filter.Write with type symbol=utf8 and type dest=Dst.t) = struct
type symbol = utf8
type dest = Dst.t
type t = Dst.t
let attach t = t
let flush t = ()
let put t ch =
if ch < 0x80 then Dst.put t (char_of_int ch)
else if ch < 0x800 then begin
Dst.put t (char_of_int (0xC0 lor (ch lsr 6)));
Dst.put t (char_of_int (0x80 lor (ch land 0x3F)))
end else if ch < 0x10000 then begin
Dst.put t (char_of_int (0xE0 lor (ch lsr 12)));
Dst.put t (char_of_int (0x80 lor ((ch lsr 6) land 0x3F)));
Dst.put t (char_of_int (0x80 lor (ch land 0x3F)))
end else if ch < 0x110000 then begin
Dst.put t (char_of_int (0xF0 lor (ch lsr 18)));
Dst.put t (char_of_int (0x80 lor ((ch lsr 12) land 0x3F)));
Dst.put t (char_of_int (0x80 lor ((ch lsr 6) land 0x3F)));
Dst.put t (char_of_int (0x80 lor (ch land 0x3F)))
end else raise InvalidSymbol
end
end
module type Type =
sig
type t
end
module Char = struct
type t = char
end
module Copy (T:Type) (Src:Stream.Read with type symbol=T.t) (Dst:Stream.Write with type symbol=T.t) = struct
let run s d =
while true do
Dst.put d (Src.get s)
done
end
let copy_byte src dst =
let module CopyFile = Copy(Char) (File.Read) (File.Write) in
let src = File.Read._open src
and dst = File.Write._open dst in
try CopyFile.run src dst
with End_of_file ->
File.Read.close src;
File.Write.close dst
module FileWriteUtf8 = UTF8.Write (File.Write)
let copy_utf src dst =
let module FileReadUtf8 = UTF8.Read (File.Read) in
let module CopyFile = Copy(UTF8) (FileReadUtf8) (FileWriteUtf8) in
let src = File.Read._open src
and dst = File.Write._open dst in
let src8 = FileReadUtf8.attach src
and dst8 = FileWriteUtf8.attach dst
in
try CopyFile.run src8 dst8
with End_of_file ->
File.Read.close src;
File.Write.close dst
module BufferedFileRead = Buffer.Read(File.Read)
module BufferedFileReadUtf8 = UTF8.Read(BufferedFileRead)
module FileReadUtf8 = UTF8.Read(File.Read)
let copy_utf2 src dst =
let module CopyFile = Copy (UTF8) (BufferedFileReadUtf8) (FileWriteUtf8) in
let src = File.Read._open src
and dst = File.Write._open dst in
let srcb = BufferedFileRead.attach src in
let src8 = BufferedFileReadUtf8.attach srcb
and dst8 = FileWriteUtf8.attach dst
in
try CopyFile.run src8 dst8
with End_of_file ->
File.Read.close src;
File.Write.close dst
module Utf2Ascii (Dst:Stream.Write with type symbol=char ) = struct
type symbol = UTF8.t
type t = Dst.t
type dest = Dst.t
let attach (t:Dst.t) :t = t
let put t ch =
match ch with
ch when ch < 0x80 -> Dst.put t (char_of_int ch)
| _ -> Dst.put t ' '
end
let copy_utf2ascii src dst =
let module FileWriteAscii = Utf2Ascii(File.Write) in
let module CopyFile = Copy (UTF8) (BufferedFileReadUtf8) (FileWriteAscii) in
let src = File.Read._open src
and dst = File.Write._open dst in
let srcb = BufferedFileRead.attach src in
let src8 = BufferedFileReadUtf8.attach srcb in
let dsta = FileWriteAscii.attach dst in
try CopyFile.run src8 dsta
with End_of_file ->
File.Read.close src;
File.Write.close dst
module Static = struct
type symbol = char
type t = {
mutable pos: int;
mutable buf: String.t list;
mutable cur: String.t
}
let attach l = {
pos = 0;
buf = l;
cur = "";
}
let rec get t =
if t.pos < String.length t.cur then begin
let ch = t.cur.[t.pos] in
t.pos <- t.pos + 1;
ch
end else match t.buf with
hd::tl ->
t.buf <- tl;
t.pos <- 0;
t.cur <- hd;
get t
| [] -> raise End_of_file
end
let copy_static dst =
let module CopyFile = Copy (Char) (Static) (File.Write) in
let dst = File.Write._open dst
and src = Static.attach ["Hello";"\n";"World";"\n"] in
try CopyFile.run src dst
with End_of_file ->
File.Write.close dst
module StreamSockRead = Buffer.Read(Socket.Read)
module StreamSockWrite = Buffer.Write(Socket.Write)
let get_utf8 host url dst =
let addr = Unix.ADDR_INET((Unix.gethostbyname host).Unix.h_addr_list.(0), 80) in
let (sr,sw) = Socket.connect addr in
let req = Static.attach ["GET"; " ";url; " "; "HTTP/0.9";"\n";"\n"] in
let sock_wr = StreamSockWrite.attach sw
and sock_rd = StreamSockRead.attach sr in
let module SendReq = Copy (Char) (Static) (StreamSockWrite) in
try SendReq.run req sock_wr with
End_of_file ->
StreamSockWrite.flush sock_wr;
let dst = File.Write._open dst in
let module GetFile = Copy (Char) (StreamSockRead) (File.Write) in
try GetFile.run sock_rd dst with
End_of_file ->
Socket.close sr;
File.Write.close dst
module Length (T:Type) (Src:Stream.Read with type symbol=T.t) = struct
let get s =
let len = ref 0 in
try while true do
ignore (Src.get s);
incr len
done; -1
with End_of_file -> !len
end
let length file =
let src = File.Read._open file in
let module FileCharLen = Length (Char) (File.Read) in
let len = FileCharLen.get src in
File.Read.seek src 0;
let module FileUtf8Len = Length (UTF8) (FileReadUtf8) in
let usrc = FileReadUtf8.attach src in
let len8 = FileUtf8Len.get usrc in
File.Read.close src;
len,len8
let run () =
let html = "utf8.html" in
List.iter (
fun (desc,op,file) ->
print_endline desc;
op file;
let (bytes,symbols) = length file in
Printf.printf "Done. File '%s'; bytes %d, symbols %d\n" file bytes symbols
) [
"Hello World", copy_static, "static.dat";
"Getting corpse from a web ..", (get_utf8 "www.columbia.edu" "/kermit/utf8.html" ), html;
"Copying corpse using bytecopy", (copy_byte html), "copy.dat";
"Copying corpse using utf8 symbols", (copy_utf html), "copy_utf8.dat";
"Copying corpse using utf8 symbols and read buffer", (copy_utf2 html), "copy_utf8_2.dat";
"Converting corpse to ASCII", (copy_utf2ascii html), "ascii.dat";
]
let _ = run ()
next reply other threads:[~2004-05-03 6:12 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-05-03 6:12 Vladimir N. Silyaev [this message]
2004-05-04 21:31 ` Benjamin Geer
2004-05-04 22:59 ` Yamagata Yoriyuki
2004-05-05 8:11 ` skaller
2004-05-05 15:48 ` Marcin 'Qrczak' Kowalczyk
2004-05-05 19:28 ` skaller
2004-05-05 17:33 ` Vladimir N. Silyaev
2004-05-05 17:31 ` Vladimir N. Silyaev
2004-05-07 22:11 ` Benjamin Geer
2004-05-08 7:29 ` Vladimir N. Silyaev
2004-05-09 17:35 ` Benjamin Geer
-- strict thread matches above, loose matches on Subject: below --
2004-04-24 9:28 [Caml-list] [ANN] The Missing Library Nicolas Cannasse
2004-04-25 8:56 ` Common IO structure (was Re: [Caml-list] [ANN] The Missing Library) Yamagata Yoriyuki
2004-04-25 11:54 ` Gerd Stolpmann
2004-04-26 14:53 ` [Caml-list] Re: Common IO structure Yamagata Yoriyuki
2004-04-26 21:02 ` Gerd Stolpmann
2004-04-25 19:42 ` Common IO structure (was Re: [Caml-list] [ANN] The Missing Library) Nicolas Cannasse
2004-04-26 13:16 ` [Caml-list] Re: Common IO structure Yamagata Yoriyuki
2004-04-26 13:53 ` Jacques GARRIGUE
2004-04-26 14:26 ` Nicolas Cannasse
2004-04-28 6:52 ` Jacques GARRIGUE
2004-04-26 14:23 ` Nicolas Cannasse
2004-04-26 14:55 ` skaller
2004-04-26 15:26 ` Yamagata Yoriyuki
2004-04-26 19:28 ` Nicolas Cannasse
2004-04-26 20:56 ` Gerd Stolpmann
2004-04-26 21:14 ` John Goerzen
2004-04-26 22:32 ` Gerd Stolpmann
2004-04-26 21:52 ` Benjamin Geer
2004-04-27 16:00 ` Yamagata Yoriyuki
2004-04-27 21:51 ` Gerd Stolpmann
2004-04-27 19:08 ` Nicolas Cannasse
2004-04-27 22:22 ` Gerd Stolpmann
2004-04-28 7:42 ` Nicolas Cannasse
2004-04-29 10:13 ` Yamagata Yoriyuki
2004-04-27 15:43 ` Yamagata Yoriyuki
2004-04-27 16:17 ` Nicolas Cannasse
2004-04-27 16:58 ` Yamagata Yoriyuki
2004-04-27 23:35 ` Benjamin Geer
2004-04-28 3:44 ` John Goerzen
2004-04-28 13:01 ` Richard Jones
2004-04-28 21:30 ` Benjamin Geer
2004-04-28 21:44 ` John Goerzen
2004-04-28 22:41 ` Richard Jones
2004-04-29 11:51 ` Benjamin Geer
2004-04-29 12:03 ` Richard Jones
2004-04-29 15:16 ` Benjamin Geer
2004-04-29 10:27 ` Yamagata Yoriyuki
2004-04-29 13:03 ` John Goerzen
2004-04-29 13:40 ` Yamagata Yoriyuki
2004-04-29 14:02 ` John Goerzen
2004-04-29 15:31 ` Yamagata Yoriyuki
2004-04-29 17:31 ` james woodyatt
2004-04-29 23:53 ` Benjamin Geer
2004-04-30 4:10 ` james woodyatt
2004-04-29 11:23 ` Benjamin Geer
2004-04-29 12:23 ` Richard Jones
2004-04-29 15:10 ` Benjamin Geer
2004-04-29 15:35 ` John Goerzen
2004-04-29 15:46 ` Benjamin Geer
2004-04-29 15:58 ` Richard Jones
2004-04-29 20:41 ` John Goerzen
2004-04-29 22:35 ` Benjamin Geer
2004-05-01 14:37 ` Brian Hurt
2004-04-29 13:23 ` John Goerzen
2004-04-29 14:12 ` John Goerzen
2004-04-29 15:37 ` Benjamin Geer
2004-04-28 7:05 ` Nicolas Cannasse
2004-04-28 0:20 ` skaller
2004-04-28 3:39 ` John Goerzen
2004-04-28 13:04 ` Richard Jones
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20040503061212.GA64216@server.vns.oc.ca.us \
--to=vsilyaev@mindspring.com \
--cc=caml-list@inria.fr \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox