Mailing list for all users of the OCaml language and system.
 help / color / mirror / Atom feed
From: Peng Zang <peng.zang@gmail.com>
To: "Evan Klitzke" <evan@yelp.com>
Cc: caml-list@inria.fr
Subject: Re: [Caml-list] Help with simple ocaml memoization problem
Date: Thu, 29 Nov 2007 00:53:12 -0500	[thread overview]
Message-ID: <200711290053.15443.peng.zang@gmail.com> (raw)
In-Reply-To: <cfb58d850711281917j2fe6efb6uee50068285736ec2@mail.gmail.com>

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

I don't know how to increase the stack size off the top of my head, but in 
general you want to avoid recursion on the stack anyways.  An easy way is to 
simply make the function tail recursive so the compiler can optimized it into 
a loop for you.  Here's a pretty faithful replication of your python code.  
Note I use Int64 instead of BigInt as I'm running OCaml 3.09.3.

let ( ++ ) = Int64.add;;
let ( ** ) = Int64.mul;;
let ( %% ) = Int64.rem;;
let ( // ) = Int64.div;;
let l1 = Int64.of_int 1;;
let l2 = Int64.of_int 2;;
let l3 = Int64.of_int 3;;
let cache = Hashtbl.create 1000000;;

let collatz = function
  | x when x %% l2 = l1 -> l3 ** x ++ l1
  | x -> x // l2
;;
let rec find_collatz_len = function
  | x when x = l1 -> l1
  | x when Hashtbl.mem cache x -> Lazy.force (Hashtbl.find cache x)
  | x -> let ans = lazy (l1 ++ find_collatz_len (collatz x)) in
      Hashtbl.add cache x ans;
      Lazy.force ans
;;
let lengths = Array.init 999999
  (fun x -> let x = Int64.of_int (x+1) in (x, find_collatz_len x));;
Array.sort (fun (a,b) (c,d) -> compare d b) lengths;;
lengths.(0);;


The use of laziness here is just so I can put the recursive call in the tail 
position -- a quick hack because I'm too lazy to spend time making it into a 
loop myself.

There should really be some easy syntax to write BigInts or Int64s directly in 
the code.  Is there some camlp4 that does this somewhere?  And it would nice 
if Int64 operators were predefined in the module.

Peng


On Wednesday 28 November 2007 10:17:42 pm Evan Klitzke wrote:
> Hi everyone,
>
> I'm learning Ocaml and I'm trying to figure out why the code I've
> written is so slow (and not working). I'm trying to learn Ocaml by
> implementing the Project Euler problems in Ocaml, and I'm stuck on
> problem 14 (http://projecteuler.net/index.php?section=problems&id=14).
>
> Basically I'm doing the 3*n+1 problem and I need to memoize values
> that I've already computed if the function is going to finish in a
> reasonable amount of time. I wrote a really short prototype of what I
> want to do in Python, and that code is really short and runs quickly
> (11 seconds); if you're interested you can see the prototype at
> http://static.eklitzke.org/collatz.py
>
> You can see my Ocaml attempt at
> http://static.eklitzke.org/problem14.ml . As you can see, it's a _lot_
> more code, which alone leads me to think that I'm probably not doing
> this idiomatically. When I run it (compiled with ocamlopt, ocaml 3.10
> on Linux), the memory usage climbs and after it reaches about 128 MB
> (after about 30 seconds) I get the error:
>
> Fatal error: exception Stack_overflow
>
> I think I could use some help with this problem. Maybe I just need to
> increase the stack size (how do I do that?) -- the Python version gets
> to about the same size right before finishing, so this seems like a
> reasonable amount of memory to use -- but I'm concerned that maybe I'm
> doing something else wrong, because I'd expect the Ocaml version to
> run much more quickly given that it is compiled to native code.  I
> would appreciate any pointers for things that I'm doing wrong or
> awkwardly.
>
> Thanks in advance!
>
> I'm going to include the text of my ocaml version  inline below, but
> due to the long lines in the program it will likely end up mangled :-/
>
> module type INT = sig
>   type t = int
>   val compare : t -> t-> int
> end;;
>
> module Int : INT = struct
>   type t = int
>   let compare i j = if i < j then -1 else if i = j then 0 else 1
> end;;
>
> open Big_int
> open Map
>
> (* A hash table for ints *)
> module IntMap = Map.Make(Int);;
>
> (* This is where we're going to store memoized values for find_collatz_len
> *) let ann = ref IntMap.empty;;
>
> (* Find the length of the chain for the int n *)
> let find_collatz_len (n:int) =
>
>   (* This is the definition of the collatz map for big_ints *)
>   let collatz (n:big_int) =
>     let two_big_int = succ_big_int unit_big_int in
>     if eq_big_int (mod_big_int n two_big_int) zero_big_int
>     then
>       div_big_int n two_big_int
>     else
>       succ_big_int(mult_int_big_int 3 n)
>
>   (* Tries to find the value of key `k' in the map m, or 0 if not found *)
>   and find_safe m k = try IntMap.find k m with Not_found -> 0 in
>
>   (* Helper function that accepts big_ints; t is the term that we're
> finding the length of *)
>   let rec aux_collatz_len (t:big_int) =
>
>     (* We use memoization for terms small enough to fit in an int *)
>     if is_int_big_int t then
>       let t_int = int_of_big_int t in
>       let v = find_safe !ann t_int in
>       if v <> 0 then
>         v (* Cache hit -- we've already computed aux_collatz_len for
> this value of t *)
>       else
>         if eq_big_int t unit_big_int then 1
>         else
>           let v' = 1 + aux_collatz_len (collatz t) in
>           ann := IntMap.add t_int v' !ann ; v'
>     else
>       1 + aux_collatz_len (collatz t)
>   in
>   aux_collatz_len (big_int_of_int n);;
>
> (* Makes the list [1..n] *)
> let nats n =
>   let rec rnats m = if m = 0 then [] else m :: (rnats (m - 1)) in
> List.rev (rnats n) ;;
>
> let biggest_int l =
>   let rec aux l (num, len) =
>     if l = [] then
>       num
>     else
>       let (a, b) = List.hd l and t = List.tl l in if b > len then aux
> t (a, b) else aux t (num, len)
>   in
> aux l (List.hd l) ;;
>
> let solution = biggest_int (List.map (fun x -> (x, find_collatz_len
> x)) (nats 999999)) ;;
>
> print_endline (string_of_int solution)


-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.7 (GNU/Linux)

iD8DBQFHTlPLfIRcEFL/JewRAuxTAKCr/Pe/xCTW2csXY/PpvYLL1yELKQCgrC4U
ykJ05fhUQIF6JKlwI0F8/cA=
=lLIG
-----END PGP SIGNATURE-----


  reply	other threads:[~2007-11-29  5:53 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-11-29  3:17 Evan Klitzke
2007-11-29  5:53 ` Peng Zang [this message]
2007-11-29  6:12   ` [Caml-list] " Evan Klitzke
2007-11-29  8:16     ` David Allsopp
2007-11-29  8:11       ` Jon Harrop
2007-11-29  8:58         ` Jean-Christophe Filliâtre
2007-11-29 18:57           ` Jon Harrop
2007-11-29 22:25           ` Jon Harrop
2007-11-30 11:03             ` Jean-Christophe Filliâtre
2007-11-29  8:40       ` Luc Maranget
2007-11-29  8:47     ` Jean-Christophe Filliâtre
2007-12-04 23:49     ` Peng Zang
2007-11-29  8:08   ` Jon Harrop
2007-11-29 15:59     ` Peng Zang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200711290053.15443.peng.zang@gmail.com \
    --to=peng.zang@gmail.com \
    --cc=caml-list@inria.fr \
    --cc=evan@yelp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox