From: sejourne_kevin <sejourne_kevin@yahoo.fr>
To: padiolea@irisa.fr
Cc: Erik de Castro Lopo <ocaml-erikd@mega-nerd.com>,
caml-list@yquem.inria.fr
Subject: Re: [Caml-list] Hashtbl.create 401
Date: Sat, 23 Apr 2005 00:26:57 +0000 [thread overview]
Message-ID: <42699651.6020200@yahoo.fr> (raw)
In-Reply-To: <40833.131.254.50.45.1114171604.squirrel@mail.irisa.fr>
padiolea@irisa.fr a écrit :
>>On Thu, 21 Apr 2005 21:09:18 -0600
>>Matt Gushee <mgushee@havenrock.com> wrote:
>>
>>
>>>Hello, all--
>>>
>>>While browsing through the Labltk source code (and perhaps in some other
>>>places), I noticed several instances of:
>>>
>>> Hashtbl.create 401
>>>
>>>Why 401?
>>
>>Hash tables distribute data across a set of hash buckets. If
>>the number of hash buckets is prime (401 is prime) then their
>>is a better chance of the data being evenly distributed across
>>the buckets.
>
>
> but it seems that this number is not that much important since,
> as said in the ml source:
> (* We do dynamic hashing, and resize the table and rehash the elements
> when buckets become too long. *)
If you want to have a HashTbl with only a prime number of hash buckets,
you can do things like that (with code stolen from the stdlib):
let table = (* list of (next_primes 2n+1) of root 3 *)
[| 3; 11; 29; 61; 127; 257; 521; 1049; 2111; 4229; 8461; 16927; 33857;
67723; 135449; 270913; 541831; 1083689; 2167393;(*max 32 bits*) 4334791;
8669593; 17339197; 34678421; 69356857; 138713717; 277427441; 554854889
|]
;;
type ('a, 'b) t =
{
mutable size: int; (* number of elements *)
mutable index: int; (* index of the next array
size*)
mutable data: ('a, 'b) bucketlist array (* the buckets *)
}
and ...
let create initial_size =
let i_m, _ =
fold_lefti (fun i ((_,v_m) as u) x ->
let v = abs (x - initial_size) in
if v < v_m then (i,v) else u
) (0,max_int) table
in
let s = min (max 1 table.(i_m)) Sys.max_array_length
in { size = 0; index = i_m; data = Array.make s Empty }
let resize hashfun tbl =
let _ = tbl.index <- succ tbl.index in
let osize = Array.length tbl.data in
let nsize = try min (table.(tbl.index)) Sys.max_array_length with |_->
osize
in
if nsize <> osize then
begin
let ndata = Array.create nsize Empty in
let rec insert_bucket = function
Empty -> ()
| Cons(key, data, rest) ->
insert_bucket rest; (* preserve original order of elements *)
let nidx = (hashfun key) mod nsize in
ndata.(nidx) <- Cons(key, data, ndata.(nidx)) in
for i = 0 to osize - 1 do
insert_bucket tbl.data.(i)
done;
tbl.data <- ndata
end
let copy h =
{ size = h.size;
index = h.index;
data = Array.copy h.data }
....
next prev parent reply other threads:[~2005-04-22 22:16 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-04-22 3:09 Matt Gushee
2005-04-22 3:21 ` [Caml-list] " Erik de Castro Lopo
2005-04-22 12:06 ` padiolea
2005-04-23 0:26 ` sejourne_kevin [this message]
2005-04-22 3:33 ` Radu Grigore
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=42699651.6020200@yahoo.fr \
--to=sejourne_kevin@yahoo.fr \
--cc=caml-list@yquem.inria.fr \
--cc=ocaml-erikd@mega-nerd.com \
--cc=padiolea@irisa.fr \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox