Mailing list for all users of the OCaml language and system.
 help / color / mirror / Atom feed
* Functorized stdlib ???
@ 1996-10-04 15:32 Utilisateur FNET
  1996-10-07  8:34 ` Jacques GARRIGUE
  0 siblings, 1 reply; 2+ messages in thread
From: Utilisateur FNET @ 1996-10-04 15:32 UTC (permalink / raw)



961004
(* ========================================================================= *)
Here is a suggestion about the standard ocaml library Hashtbl.

This module is very useful but it happens to uses the polymorphic
equality ( = ) : 'a -> 'a -> bool.

Unfortunately, there are cases where you want a hash-table with another
equality predicate, for example you might want physical equality ( == ).

This is precisely what functors are for !

Here is first a short example building a min function from an order
(the order is possibly polymorphic)

(* ========================================================================= *)
(* short_example.ml *)
(* ========================================================================= *)
module type ORDER =
sig
   type 'a t
   val ( <= ) : 'a t -> 'a t -> bool
end

module Min (Order : ORDER) =
struct
   let min x y = if (Order.( <= ) x y) then x else y
end

(* ========================================================================= *)
module OrderPoly =
struct
   type 'a t = 'a
   let ( <= ) = ( <= )
end

module MinPoly = Min (OrderPoly)

(* ========================================================================= *)
module OrderString =
struct
   type 'a t = string
   let ( <= ) x y = String.length x <= String.length y
end

module MinString = Min (OrderString)

(* ========================================================================= *)

let _ =
begin
   Printf.printf "minpoly   3.14 2.71 = %.2f\n" (MinPoly.min 3.14 2.71);
   Printf.printf "minpoly   b    az   = %s\n"   (MinPoly.min "b" "az");
   Printf.printf "minstring b    az   = %s\n"   (MinString.min "b" "az");
   ()
end

(* ========================================================================= *)

And here, applied to hash-tables to get polymorphic
(or, if needed, monomorphic) hash-tables :

(* ========================================================================= *)
(* File h.mli *)
(* ========================================================================= *)
module type EQUALITY =
  sig
    type 'a t
    val ( = ) : 'a t -> 'a t -> bool
    val hash_param : int -> int -> 'a t -> int
  end

module type H =
    sig
      type 'a equality
      type ('a, 'b) t
      val create : int -> ('a, 'b) t
      val clear : ('a, 'b) t -> unit
      val add : ('a equality, 'b) t -> 'a equality -> 'b -> unit
      val remove : ('a equality, 'b) t -> 'a equality -> unit
      val find : ('a equality, 'b) t -> 'a equality -> 'b
      val find_all : ('a equality, 'b) t -> 'a equality -> 'b list
      val iter : ('a equality -> 'b -> 'c) -> ('a equality, 'b) t -> unit
    end

module H (Equality : EQUALITY) : (H with type 'a equality = 'a Equality.t)

(* ========================================================================= *)
module HEqual : (H with type 'a equality = 'a)
module HEq : (H with type 'a equality = 'a)

(* ========================================================================= *)

(* ========================================================================= *)
(* File h.ml : contains stdlib/hashtbl.ml mostly unmodified *) 
(* ========================================================================= *)
module type EQUALITY =
sig
   type 'a t
   val ( = ) : 'a t -> 'a t -> bool
   val hash_param : int -> int -> 'a t -> int
end

module type H =
    sig
      type 'a equality
      type ('a, 'b) t
      val create : int -> ('a, 'b) t
      val clear : ('a, 'b) t -> unit
      val add : ('a equality, 'b) t -> 'a equality -> 'b -> unit
      val remove : ('a equality, 'b) t -> 'a equality -> unit
      val find : ('a equality, 'b) t -> 'a equality -> 'b
      val find_all : ('a equality, 'b) t -> 'a equality -> 'b list
      val iter : ('a equality -> 'b -> 'c) -> ('a equality, 'b) t -> unit
    end

(* ========================================================================= *)
module H (Equality : EQUALITY) =
struct
   let ( -- ) = ( = )
   and ( = ) = Equality.( = )
   and hash_param = Equality.hash_param
   type 'a equality = 'a Equality.t
(* ========================================================================= *)


(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  Automatique.  Distributed only by permission.                      *)
(*                                                                     *)
(***********************************************************************)

(* $Id: hashtbl.ml,v 1.5 1996/04/30 14:50:09 xleroy Exp $ *)

(* Hash tables *)

(* We do dynamic hashing, and we double the size of the table when
   buckets become too long, but without re-hashing the elements. *)

type ('a, 'b) t =
  { mutable max_len: int;                    (* max length of a bucket *)
    mutable data: ('a, 'b) bucketlist array } (* the buckets *)

and ('a, 'b) bucketlist =
    Empty
  | Cons of 'a * 'b * ('a, 'b) bucketlist

let create initial_size =
  { max_len = 2; data = Array.create initial_size Empty }

let clear h =
  for i = 0 to Array.length h.data - 1 do
    h.data.(i) <- Empty
  done

let resize h =
  let n = Array.length h.data in
  let newdata = Array.create (n+n) Empty in
    Array.blit h.data 0 newdata 0 n;
    Array.blit h.data 0 newdata n n;
    h.data <- newdata;
    h.max_len <- 2 * h.max_len

let rec bucket_too_long n bucket =
  if n < 0 then true else
    match bucket with
      Empty -> false
    | Cons(_,_,rest) -> bucket_too_long (pred n) rest

(* CHANGE : removed external declaration of hash_param *)

let add h key info =
  let i = (hash_param 10 100 key) mod (Array.length h.data) in
  let bucket = Cons(key, info, h.data.(i)) in
    h.data.(i) <- bucket;
    if bucket_too_long h.max_len bucket then resize h

let remove h key =
  let rec remove_bucket = function
      Empty ->
        Empty
    | Cons(k, i, next) ->
        if k = key then next else Cons(k, i, remove_bucket next) in
  let i = (hash_param 10 100 key) mod (Array.length h.data) in
    h.data.(i) <- remove_bucket h.data.(i)

let find h key =
  match h.data.((hash_param 10 100 key) mod (Array.length h.data)) with
    Empty -> raise Not_found
  | Cons(k1, d1, rest1) ->
      if key = k1 then d1 else
      match rest1 with
        Empty -> raise Not_found
      | Cons(k2, d2, rest2) ->
          if key = k2 then d2 else
          match rest2 with
            Empty -> raise Not_found
          | Cons(k3, d3, rest3) ->
              if key = k3 then d3 else begin
                let rec find = function
                    Empty ->
                      raise Not_found
                  | Cons(k, d, rest) ->
                      if key = k then d else find rest
                in find rest3
              end

let find_all h key =
  let rec find_in_bucket = function
    Empty ->
      []
  | Cons(k, d, rest) ->
      if k = key then d :: find_in_bucket rest else find_in_bucket rest in
  find_in_bucket h.data.((hash_param 10 100 key) mod (Array.length h.data))

let iter f h =
  let d = h.data in
  let len = Array.length d in
  for i = 0 to len - 1 do
    let rec do_bucket = function
        Empty ->
          ()
      | Cons(k, d, rest) ->
          if (hash_param 10 100 k) mod len -- i (* CHANGE -- instead of == *)
          then begin f k d; do_bucket rest end
          else do_bucket rest in
    do_bucket d.(i)
  done

let hash x = hash_param 50 500 x
(* ========================================================================= *)
(* original stdlib/hashtbl.ml ends here *)
(* ========================================================================= *)
end

(* ========================================================================= *)
module EqualityEqual =
struct
   type 'a t = 'a
   let ( = ) = ( = )
   let hash_param = Hashtbl.hash_param
end

module HEqual = H (EqualityEqual)

(* ========================================================================= *)
module EqualityEq =
struct
   type 'a t = 'a
   let ( = ) = ( == )

   (* just to try *)
   (*
   let hash_param _ _ x = 0
   *)
   (* or more realistic *)
   (**)
   external hash_eq : 'a -> int = "ML_hash_eq"
   let hash_param _ _ = hash_eq
   (**)
end

module HEq = H (EqualityEq)

(* ========================================================================= *)

/* ========================================================================= */
/* file h_c.c : found inspiration in byterun/hash.c */
/* ========================================================================= */
#include <mlvalues.h>

value ML_hash_eq (value v) { return Val_long (v & 0x3FFFFFFF); }
  /* The & has two purposes: ensure that the return value is positive
     and give the same result on 32 bit and 64 bit architectures. */
   
/* ========================================================================= */

(* ========================================================================= *)
(* file test_h.ml *)
(* ========================================================================= *)
module HEqual = H.HEqual
module Hashtbl = HEqual            (* backward compatibility is possible *)

let tequal = HEqual.create 127
let _ =
begin
   HEqual.add tequal 1 "one";
   ()
end
let dummy = HEqual.find tequal 1   (* "one" *)

(* ========================================================================= *)
module HEq = H.HEq

let a1 = "a"
and a2 = "a"                       (* a1 = a2 but a1 != a2 *)

let teq = HEq.create 127
let _ =
begin
   HEq.add teq a1 1;
   HEq.add teq a2 2;
   ()
end
let dummy = HEq.find teq a1        (* 1 *)
let dummy = HEq.find teq a2        (* 2 *)

(* ========================================================================= *)

It seems to me that functors and polymorphism might/should go together well.

At any rate I would like a way to conceive modules and functors that
do not forbid polymorphism because, if you look at the short example :

module type ORDER =
sig
   type 'a t
   val ( <= ) : 'a t -> 'a t -> bool
end

It is really conter-intuitive since I have to parameterize the type t
with 'a before any use of type t.

What I first wrote (which does not work in the end) was :

module type ORDER_first =
sig
   type t
   val ( <= ) : 'a t -> 'a t -> bool
end

this is what I meant but OrderPoly was not of module type ORDER_first,
that's why I finally had to choose type 'a t.

Unfortunately this is no real solution since a predicate over
couples, say (fun (x1,y1) (x2, y2) -> y1 <= y2) : 'a * 'b -> 'a * 'b -> bool
will not fit in module type ORDER whose type 'a t has only one parameter.

Well, well, if somebody has clues ...

By the way, this technique of using paramerized types can also be used
to modify the Set module of stdlib to get a polymorphic set type
(just change type t into type 'obj t and type elt into type 'obj elt !)

This does not seem much but it enables the user to share code (as I do not
know whether applying functors duplicates code or not) and most of all, to
use only one module (no need to create modules all over the code ...)

Please, feel free to be controversial :-)

(* ========================================================================= *)
Thierry Bravier
Dassault Aviation.
DGT / DTN / ELO / EAV
78, Quai Marcel Dassault
F-92214 Saint-Cloud Cedex
France

Telephone : (33) 01 47 11 53 07
Telecopie : (33) 01 47 11 52 83
E-Mail :    bravier@dassault-avion.fr

(* ========================================================================= *)






^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: Functorized stdlib ???
  1996-10-04 15:32 Functorized stdlib ??? Utilisateur FNET
@ 1996-10-07  8:34 ` Jacques GARRIGUE
  0 siblings, 0 replies; 2+ messages in thread
From: Jacques GARRIGUE @ 1996-10-07  8:34 UTC (permalink / raw)
  To: caml-list


>>>>>  <bravier@dassault-avion.fr> writes:

 > Here is a suggestion about the standard ocaml library Hashtbl.

 > This module is very useful but it happens to uses the polymorphic
 > equality ( = ) : 'a -> 'a -> bool.

 > Unfortunately, there are cases where you want a hash-table with another
 > equality predicate, for example you might want physical equality ( == ).

 > This is precisely what functors are for !

I have a doubt about the need for modifying specifically Hashtbl for
that.  In fact, there is already a Map module for that. The fact you
have to define a C function (to be efficient) limits the variants you
may have with your functor anyway.

But the general parameterizing problem you expose is interesting.

 > It seems to me that functors and polymorphism might/should go together well.

 > At any rate I would like a way to conceive modules and functors that
 > do not forbid polymorphism because, if you look at the short example :

 > module type ORDER =
 > sig
 >    type 'a t
 >    val ( <= ) : 'a t -> 'a t -> bool
 > end

 > It is really conter-intuitive since I have to parameterize the type t
 > with 'a before any use of type t.

 > What I first wrote (which does not work in the end) was :

 > module type ORDER_first =
 > sig
 >    type t
 >    val ( <= ) : 'a t -> 'a t -> bool
 > end

 > this is what I meant but OrderPoly was not of module type ORDER_first,
 > that's why I finally had to choose type 'a t.

 > Unfortunately this is no real solution since a predicate over
 > couples, say (fun (x1,y1) (x2, y2) -> y1 <= y2) : 'a * 'b -> 'a * 'b -> bool
 > will not fit in module type ORDER whose type 'a t has only one parameter.

 > Well, well, if somebody has clues ...

A (partial solution) would be to use classes, and in my opinion this
is more natural. You generally use only few hashtables/maps of the
same type in the same program, and having to define a module for each
type is a pain. With a class, you can either give the parameter when
creating an object, or define specialized classes by inheritance (as
you would do with a functor). The most interesting point is that you
don't need to prefix your method names by a strange module name: this
is taken from the object.

Here is the interface for map (imperative style : there is an internal
state and modifications return unit, but one could do it functional
style):

class ('a, 'b) map ('a -> 'a -> int) =
  method clear : unit
  method add : 'a -> 'b -> unit
  method find : 'a -> 'b
  method remove : 'a -> unit
  method iter : ('a -> 'b -> unit) -> unit
end

The parameter is the comparison function which you usually give to the 
functor.

Either use it directly

  # let m = new map compare;;
  m : ('_a,'_b) map = <object>

or create a specialized class, integer maps for instance

  # class 'a imap () =
      inherit (int,'a) map (-)
    end

There is still the problem that, at the moment, all polymorphic
variables in methods have to be bound at the class level.
Here this means that we cannot define fold:

  method fold : ('c -> 'a -> 'b -> 'c) -> 'c -> 'c

is not allowed : 'c should be bound at the class level, but then this
is meaningless. You can still simulate it with iter, so this is not
that bad.

Remark also that since functors bind type constructors rather then
types themselves, the map class cannot be defined using the functor,
and the source code has to be modified.

 > By the way, this technique of using paramerized types can also be used
 > to modify the Set module of stdlib to get a polymorphic set type
 > (just change type t into type 'obj t and type elt into type 'obj elt !)

Again I can define a set class, and avoid the problem with multiple
parameters. (I shall distribute this class library soon.)

 > This does not seem much but it enables the user to share code (as I do not
 > know whether applying functors duplicates code or not) and most of all, to
 > use only one module (no need to create modules all over the code ...)

Code is not duplicated, so the reason is mostly making program clearer 
by avoiding the multiplication of modules. We have the same concern.

 > Please, feel free to be controversial :-)

I think I've been :-)

In fact there is here a choice to be done between using
object-oriented features or not. In my opinion object-oriented style
is easier to understand, but modules and functors may be more
"functional". Not only Map and Set, but also Genlex, Hashtbl, Queue,
Stack and Stream can be turned OO with some profit.

In the current version Objective Caml allows both styles, but only
provides support for the first one: there are no predefined classes.
A concern is efficiency, but well implemented there is no reason that
it should be slower than Smalltalk, which is widely used.

On this point, it would be nice to have a clearer view of what
Objective Caml is going to be, if somebody knows.

	Jacques Garrigue





^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~1996-10-07 10:41 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
1996-10-04 15:32 Functorized stdlib ??? Utilisateur FNET
1996-10-07  8:34 ` Jacques GARRIGUE

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox