* We should all be forking
@ 2007-06-05 18:13 Jon Harrop
2007-06-05 18:25 ` [Caml-list] " Jonathan Bryant
` (4 more replies)
0 siblings, 5 replies; 10+ messages in thread
From: Jon Harrop @ 2007-06-05 18:13 UTC (permalink / raw)
To: Caml List
Following on from our "why not fork" discussion, here is my most elegant
concurrent ray tracer written in vanilla OCaml.
This program simply runs four processes (forking off three) in parallel to
improve performance when 2-4 CPUs are available (increase "d" if you have >4
CPUs).
This isn't as sophisticated as the JoCaml version but it does double
performance (taking the time down from 4s to 2s) on my dual-core machine,
making it faster than any language that uses a concurrent GC. I don't know
about you guys, but this is a complete revelation for me!
I believe the performance relies upon the Linux kernel lazily copying the
process. Does OSX also do that?
Anyway, here is the code:
let delta = sqrt epsilon_float
type vec = {x:float; y:float; z:float}
let ( *| ) s r = {x = s *. r.x; y = s *. r.y; z = s *. r.z}
let ( +| ) a b = {x = a.x +. b.x; y = a.y +. b.y; z = a.z +. b.z}
let ( -| ) a b = {x = a.x -. b.x; y = a.y -. b.y; z = a.z -. b.z}
let dot a b = a.x *. b.x +. a.y *. b.y +. a.z *. b.z
let length r = sqrt(dot r r)
let unitise r = 1. /. length r *| r
type scene =
Sphere of vec * float
| Group of vec * float * scene * scene * scene * scene * scene
let ray_sphere {x=dx; y=dy; z=dz} {x=vx; y=vy; z=vz} r =
let disc = vx *. vx +. vy *. vy +. vz *. vz -. r *. r in
if disc < 0. then infinity else
let b = vx *. dx +. vy *. dy +. vz *. dz in
let b2 = b *. b in
if b2 < disc then infinity else
let disc = sqrt(b2 -. disc) in
let t1 = b -. disc in
if t1 > 0. then t1 else b +. disc
let ray_sphere' {x=ox; y=oy; z=oz} {x=dx; y=dy; z=dz} {x=cx; y=cy; z=cz} r =
let vx = cx -. ox and vy = cy -. oy and vz = cz -. oz in
let vv = vx *. vx +. vy *. vy +. vz *. vz in
let b = vx *. dx +. vy *. dy +. vz *. dz in
let disc = b *. b -. vv +. r *. r in
disc >= 0. && b +. sqrt disc >= 0.
type hit = {l: float; nx: float; ny: float; nz: float}
let rec intersect ({x=dx; y=dy; z=dz} as dir) hit = function
Sphere ({x=cx; y=cy; z=cz} as center, radius) ->
let l' = ray_sphere dir center radius in
if l' >= hit.l then hit else
let x = l' *. dx -. cx in
let y = l' *. dy -. cy in
let z = l' *. dz -. cz in
let il = 1. /. sqrt(x *. x +. y *. y +. z *. z) in
{l = l'; nx = il *. x; ny = il *. y; nz = il *. z}
| Group (center, radius, a, b, c, d, e) ->
let l' = ray_sphere dir center radius in
if l' >= hit.l then hit else
let f h s = intersect dir h s in
f (f (f (f (f hit a) b) c) d) e
let rec intersect' orig dir = function
Sphere (center, radius) -> ray_sphere' orig dir center radius
| Group (center, radius, a, b, c, d, e) ->
let f s = intersect' orig dir s in
ray_sphere' orig dir center radius && (f a || f b || f c || f d || f e)
let neg_light = unitise { x = 1.; y = 3.; z = -2. }
let rec ray_trace dir scene =
let hit = intersect dir {l=infinity; nx=0.; ny=0.; nz=0.} scene in
if hit.l = infinity then 0. else
let n = {x = hit.nx; y = hit.ny; z = hit.nz} in
let g = dot n neg_light in
if g < 0. then 0. else
if intersect' (hit.l *| dir +| delta *| n) neg_light scene then 0. else
g
let fold5 f x a b c d e = f (f (f (f (f x a) b) c) d) e
let rec create level c r =
let obj = Sphere (c, r) in
if level = 1 then obj else
let a = 3. *. r /. sqrt 12. in
let rec bound (c, r) = function
Sphere (c', r') -> c, max r (length (c -| c') +. r')
| Group (_, _, v, w, x, y, z) -> fold5 bound (c, r) v w x y z in
let aux x' z' = create (level - 1) (c +| {x=x'; y=a; z=z'}) (0.5 *. r) in
let w = aux (-.a) (-.a) and x = aux a (-.a) in
let y = aux (-.a) a and z = aux a a in
let c, r = fold5 bound (c +| {x=0.; y=r; z=0.}, 0.) obj w x y z in
Group (c, r, obj, w, x, y, z)
let level, n =
try int_of_string Sys.argv.(1), int_of_string Sys.argv.(2) with _ -> 9, 512
let scene = create level { x = 0.; y = -1.; z = 4. } 1. and ss = 4
module String = struct
include String
let init n f =
if n=0 then "" else
let s = String.make n (f 0) in
for i=1 to n-1 do
s.[i] <- f i
done;
s
end
let raster y =
y, String.init n
(fun x ->
let g = ref 0. in
for dx = 0 to ss - 1 do
for dy = 0 to ss - 1 do
let aux x d = float x -. float n /. 2. +. float d /. float ss in
let dir = unitise {x = aux x dx; y = aux y dy; z = float n } in
g := !g +. ray_trace dir scene
done
done;
let g = 0.5 +. 255. *. !g /. float (ss*ss) in
char_of_int (int_of_float g))
let rasters d i =
Array.init ((n+d-i)/d) (fun j -> raster(d*j+i))
let invoke (f : 'a -> 'b) x : unit -> 'b =
let input, output = Unix.pipe() in
match Unix.fork() with
| 0 ->
Unix.close input;
let output = Unix.out_channel_of_descr output in
Marshal.to_channel output (try `Res(f x) with e -> `Exn e) [];
exit 0
| _ ->
Unix.close output;
let input = Unix.in_channel_of_descr input in
fun () ->
match Marshal.from_channel input with
| `Res x -> x
| `Exn e -> raise e
let ( |> ) x f = f x
let map (f : 'a -> 'b) a : 'b array =
let n = Array.length a in
let aux i x =
if i<n-1 then invoke f x else
let f_x = f x in
fun () -> f_x in
Array.mapi aux a |>
Array.map (fun f -> f())
let () =
let d = 4 in
let sort a = Array.sort compare a; a in
let image =
Array.init d (fun i -> i) |>
map (rasters d) |>
Array.to_list |>
Array.concat |>
sort |>
Array.map snd in
Printf.printf "P5\n%d %d\n255\n" n n;
for y = n - 1 downto 0 do
print_string image.(y)
done
--
Dr Jon D Harrop, Flying Frog Consultancy Ltd.
OCaml for Scientists
http://www.ffconsultancy.com/products/ocaml_for_scientists/?e
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Caml-list] We should all be forking
2007-06-05 18:13 We should all be forking Jon Harrop
@ 2007-06-05 18:25 ` Jonathan Bryant
2007-06-05 18:32 ` Joel Reymont
` (3 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: Jonathan Bryant @ 2007-06-05 18:25 UTC (permalink / raw)
To: Jon Harrop; +Cc: Caml List
>
> I believe the performance relies upon the Linux kernel lazily
> copying the
> process. Does OSX also do that?
The man page under OSX doesn't mention anything about copy-on-write.
That doesn't necessarily mean anything, though...
--Jonathan
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Caml-list] We should all be forking
2007-06-05 18:13 We should all be forking Jon Harrop
2007-06-05 18:25 ` [Caml-list] " Jonathan Bryant
@ 2007-06-05 18:32 ` Joel Reymont
2007-06-05 18:46 ` Jon Harrop
2007-06-05 19:13 ` Chris King
` (2 subsequent siblings)
4 siblings, 1 reply; 10+ messages in thread
From: Joel Reymont @ 2007-06-05 18:32 UTC (permalink / raw)
To: Jon Harrop; +Cc: Caml List
On Jun 5, 2007, at 7:13 PM, Jon Harrop wrote:
> This isn't as sophisticated as the JoCaml version but it does double
> performance (taking the time down from 4s to 2s) on my dual-core
> machine,
I must have missed the JoCaml version. Where is it?
Thanks, Joel
--
http://topdog.cc - EasyLanguage to C# translator
http://wagerlabs.com - Blog
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Caml-list] We should all be forking
2007-06-05 18:13 We should all be forking Jon Harrop
2007-06-05 18:25 ` [Caml-list] " Jonathan Bryant
2007-06-05 18:32 ` Joel Reymont
@ 2007-06-05 19:13 ` Chris King
2007-06-05 19:20 ` jocaml vs camlp3l Joel Reymont
2007-06-05 23:58 ` [Caml-list] We should all be forking Brian Hurt
2007-06-07 12:04 ` Richard Jones
4 siblings, 1 reply; 10+ messages in thread
From: Chris King @ 2007-06-05 19:13 UTC (permalink / raw)
To: Jon Harrop; +Cc: Caml List
On 6/5/07, Jon Harrop <jon@ffconsultancy.com> wrote:
> Following on from our "why not fork" discussion, here is my most elegant
> concurrent ray tracer written in vanilla OCaml.
>
> This program simply runs four processes (forking off three) in parallel to
> improve performance when 2-4 CPUs are available (increase "d" if you have >4
> CPUs).
I'm curious, have you considered trying camlp3l [1] to parallelize
your raytracer? It uses high-level constructs ("skeletons") to
describe parallelizable functions, e.g. for a raytracer you can
essentially just say "farm out each row to a different computing
node". It uses marshalling over TCP/IP sockets, so can be distributed
if you like. The forking you just do beforehand in a shell script
(though it gives an example where you do it in O'Caml too).
[1] http://camlp3l.inria.fr/
^ permalink raw reply [flat|nested] 10+ messages in thread
* jocaml vs camlp3l
2007-06-05 19:13 ` Chris King
@ 2007-06-05 19:20 ` Joel Reymont
2007-06-05 21:02 ` [Caml-list] " Erik de Castro Lopo
0 siblings, 1 reply; 10+ messages in thread
From: Joel Reymont @ 2007-06-05 19:20 UTC (permalink / raw)
To: Chris King; +Cc: Jon Harrop, Caml List
I'm gonna go bonkers soon. Suppose I know and quite like Erlang but
dislike its syntax and performance on non-IO things. I'm also quite
fond of OCaml.
Should I go with JoCaml? Camlp3l? Why?
Thanks, Joel
On Jun 5, 2007, at 8:13 PM, Chris King wrote:
> I'm curious, have you considered trying camlp3l [1] to parallelize
> your raytracer? It uses high-level constructs ("skeletons") to
> describe parallelizable functions, e.g. for a raytracer you can
> essentially just say "farm out each row to a different computing
> node".
--
http://topdog.cc - EasyLanguage to C# translator
http://wagerlabs.com - Blog
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Caml-list] jocaml vs camlp3l
2007-06-05 19:20 ` jocaml vs camlp3l Joel Reymont
@ 2007-06-05 21:02 ` Erik de Castro Lopo
0 siblings, 0 replies; 10+ messages in thread
From: Erik de Castro Lopo @ 2007-06-05 21:02 UTC (permalink / raw)
To: caml-list
Joel Reymont wrote:
> I'm gonna go bonkers soon. Suppose I know and quite like Erlang but
> dislike its syntax and performance on non-IO things. I'm also quite
> fond of OCaml.
I'm very keen on Ocaml and started learning Erlang for the concurrency.
However, Erlang's syntax, dynamic typing and "everything runs in the
erl shell" thing started to annoy me.
So JoCaml is currently looking *really* good.
> Should I go with JoCaml? Camlp3l? Why?
I think we let Jon Harrop and others (maybe even me) thrash this out
on a number of examples and then enjoy the benefits:-).
Erik
--
-----------------------------------------------------------------
Erik de Castro Lopo
-----------------------------------------------------------------
"I do have a policy of mandatory evisceration where the crime of
whitespace in filenames is perpetrated on my systems. This has
inspired a new range of designer colostomy bag covers in corduroy,
velvet, and the ever popular denim."
-- jdub on the SLUG mailing list
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Caml-list] We should all be forking
2007-06-05 18:13 We should all be forking Jon Harrop
` (2 preceding siblings ...)
2007-06-05 19:13 ` Chris King
@ 2007-06-05 23:58 ` Brian Hurt
2007-06-07 12:04 ` Richard Jones
4 siblings, 0 replies; 10+ messages in thread
From: Brian Hurt @ 2007-06-05 23:58 UTC (permalink / raw)
To: Jon Harrop; +Cc: Caml List
On Tue, 5 Jun 2007, Jon Harrop wrote:
>
> Following on from our "why not fork" discussion, here is my most elegant
> concurrent ray tracer written in vanilla OCaml.
>
> This program simply runs four processes (forking off three) in parallel to
> improve performance when 2-4 CPUs are available (increase "d" if you have >4
> CPUs).
>
> This isn't as sophisticated as the JoCaml version but it does double
> performance (taking the time down from 4s to 2s) on my dual-core machine,
> making it faster than any language that uses a concurrent GC. I don't know
> about you guys, but this is a complete revelation for me!
This actually doesn't surprise me. Big linear algebra problems- like ray
tracing- are almost embarassingly parallel. This is why the supercomputer
market died a decade and a half ago- it was just too easy to take the
problems supercomputers were designed to handle and spread them out over
an cluster of workstations or pcs. What needs to be shared in the ray
tracing example? The model, obviously. And the results have to be
collected. And what subset of the problem (which collection of rays) need
to be worked on. I note that the big-iron database machine (aka
mainframe) market is alive and well. Some problems don't parallelize
quite so easily.
My point here is to caution against making too much soup from this lone
oyster. Simply because this program, and a large class of other programs
like it, parallelize easily, doesn't mean all will.
On the other hand, for programs that do parallelize this way, JoCaml looks
like a wonderful gift.
Brian
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Caml-list] We should all be forking
2007-06-05 18:13 We should all be forking Jon Harrop
` (3 preceding siblings ...)
2007-06-05 23:58 ` [Caml-list] We should all be forking Brian Hurt
@ 2007-06-07 12:04 ` Richard Jones
2007-06-14 10:09 ` Luc Maranget
4 siblings, 1 reply; 10+ messages in thread
From: Richard Jones @ 2007-06-07 12:04 UTC (permalink / raw)
To: Jon Harrop; +Cc: Caml List
How close are we to real-time raytracing then? Given that 4 & even 8
core machines may become commonplace soon.
Rich.
--
Richard Jones
Red Hat
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Caml-list] We should all be forking
2007-06-07 12:04 ` Richard Jones
@ 2007-06-14 10:09 ` Luc Maranget
0 siblings, 0 replies; 10+ messages in thread
From: Luc Maranget @ 2007-06-14 10:09 UTC (permalink / raw)
To: Richard Jones; +Cc: Caml List
>
> How close are we to real-time raytracing then? Given that 4 & even 8
> core machines may become commonplace soon.
>
> Rich.
>
> --
> Richard Jones
> Red Hat
For your information there is now a more realistic ray tracer in JoCaml.
<http://jocaml.inria.fr/#examples>
Speedups can be impressive, but no real-time yet, even on
a 2x11x2Ghz grid.
The JoCaml ray tracer is a distributed/concurrent version of
the Camls'R Us entry at ICFP 2000 programming contest.
--
Luc Maranget
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2007-06-14 10:09 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-06-05 18:13 We should all be forking Jon Harrop
2007-06-05 18:25 ` [Caml-list] " Jonathan Bryant
2007-06-05 18:32 ` Joel Reymont
2007-06-05 18:46 ` Jon Harrop
2007-06-05 19:13 ` Chris King
2007-06-05 19:20 ` jocaml vs camlp3l Joel Reymont
2007-06-05 21:02 ` [Caml-list] " Erik de Castro Lopo
2007-06-05 23:58 ` [Caml-list] We should all be forking Brian Hurt
2007-06-07 12:04 ` Richard Jones
2007-06-14 10:09 ` Luc Maranget
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox