From: Jon Harrop <jon@ffconsultancy.com>
To: caml-list@yquem.inria.fr
Subject: LLVM: A native-code compiler for MiniML in ~100LOC
Date: Mon, 26 Nov 2007 20:52:18 +0000 [thread overview]
Message-ID: <200711262052.18186.jon@ffconsultancy.com> (raw)
I recently rediscovered the Low-Level Virtual Machine (LLVM) project that has
since been adopted by Apple:
http://llvm.org
This is a library (with OCaml bindings!) that allows you to write a compiler
that generates their RISC-like intermediate language (IL) that can then be
compiled to native code. LLVM even supports JIT compilation.
I went through the usual steps in trying this and was extremely impressed with
the results. After only two days I was able to create an optimizing
native-code compiler for a subset of CAML large enough to represent the
following Fibonacci program:
let rec fib n =
if n <= 2 then 1 else
fib(n-1) + fib(n-2)
do fib 40
The compiler is written entirely in OCaml, using camlp4 for lexing and
parsing, and the whole thing is only ~100 lines of code!
I'll detail exactly how you can use LLVM from OCaml in a future OCaml Journal
article:
http://www.ffconsultancy.com/products/ocaml_journal/?ol
Meanwhile here's my latest source:
type expr =
| Int of int
| Var of string
| BinOp of [ `Add | `Sub | `Leq ] * expr * expr
| If of expr * expr * expr
| Apply of expr * expr
type defn =
| LetRec of string * string * expr
open Camlp4.PreCast
let expr = Gram.Entry.mk "expr"
let defn = Gram.Entry.mk "defn"
let prog = Gram.Entry.mk "prog"
EXTEND Gram
expr:
[ [ "if"; p = expr; "then"; t = expr; "else"; f = expr ->
If(p, t, f) ]
| [ e1 = expr; "<="; e2 = expr -> BinOp(`Leq, e1, e2) ]
| [ e1 = expr; "+"; e2 = expr -> BinOp(`Add, e1, e2)
| e1 = expr; "-"; e2 = expr -> BinOp(`Sub, e1, e2) ]
| [ f = expr; x = expr -> Apply(f, x) ]
| [ v = LIDENT -> Var v
| n = INT -> Int(int_of_string n)
| "("; e = expr; ")" -> e ] ];
defn:
[ [ "let"; "rec"; f = LIDENT; x = LIDENT; "="; body = expr ->
LetRec(f, x, body) ] ];
prog:
[ [ defns = LIST0 defn; "do"; run = expr -> defns, run ] ];
END
open Printf
let program, run =
try Gram.parse prog Loc.ghost (Stream.of_channel stdin) with
| Loc.Exc_located(loc, e) ->
printf "%s at line %d\n" (Printexc.to_string e) (Loc.start_line loc);
exit 1
open Llvm
let ty = i64_type
let ( |> ) x f = f x
type state =
{ fn: llvalue;
blk: llbasicblock;
vars: (string * llvalue) list }
let bb state = builder_at_end state.blk
let new_block state name = append_block name state.fn
let find state v =
try List.assoc v state.vars with Not_found ->
eprintf "Unknown variable %s\n" v;
raise Not_found
let cont (v, state) dest_blk =
build_br dest_blk (bb state) |> ignore;
v, state
let rec expr state = function
| Int n -> const_int ty n, state
| Var x -> find state x, state
| BinOp(op, f, g) ->
let f, state = expr state f in
let g, state = expr state g in
let build, name = match op with
| `Add -> build_add, "add"
| `Sub -> build_sub, "sub"
| `Leq -> build_icmp Icmp_sle, "leq" in
build f g name (bb state), state
| If(p, t, f) ->
let t_blk = new_block state "pass" in
let f_blk = new_block state "fail" in
let k_blk = new_block state "cont" in
let cond, state = expr state p in
build_cond_br cond t_blk f_blk (bb state) |> ignore;
let t, state = cont (expr { state with blk = t_blk } t) k_blk in
let f, state = cont (expr { state with blk = f_blk } f) k_blk in
build_phi [t, t_blk; f, f_blk] "join" (bb state), state
| Apply(f, arg) ->
let f, state = expr state f in
let arg, state = expr state arg in
build_call f [|arg|] "apply" (bb state), state
let defn m vars = function
| LetRec(f, arg, body) ->
let ty = function_type ty [| ty |] in
let fn = define_function f ty m in
let vars' = (arg, param fn 0) :: (f, fn) :: vars in
let body, state =
expr { fn = fn; blk = entry_block fn; vars = vars' } body in
build_ret body (bb state) |> ignore;
(f, fn) :: vars
let int n = const_int ty n
let main filename =
let m = create_module filename in
let string = pointer_type i8_type in
let print =
declare_function "printf" (var_arg_function_type ty [|string|]) m in
let main = define_function "main" (function_type ty [| |]) m in
let blk = entry_block main in
let bb = builder_at_end blk in
let str s = define_global "buf" (const_stringz s) m in
let int_spec = build_gep (str "%d\n") [| int 0; int 0 |] "int_spec" bb in
let vars = List.fold_left (defn m) [] program in
let n, _ = expr { fn = main; blk = blk; vars = vars } run in
build_call print [| int_spec; n |] "" bb |> ignore;
build_ret (int 0) bb |> ignore;
if not (Llvm_bitwriter.write_bitcode_file m filename) then exit 1;
dispose_module m
let () = match Sys.argv with
| [|_; filename|] -> main filename
| _ as a -> Printf.eprintf "Usage: %s <file>\n" a.(0)
To use it, simply download and install the latest SVN version of LLVM (which
even builds and installs the OCaml bindings for you!) and then do:
$ ocamlc -g -dtypes -pp camlp4oof -I +camlp4 dynlink.cma camlp4lib.cma -cc g++
llvm.cma llvm_bitwriter.cma minml.ml -o minml
$ ./minml run.bc <fib.ml
$ llc -f run.bc -o run.s
$ gcc run.s -o run
$ ./run
102334155
$
You can look at the generated intermediate representation with:
$ llvm-dis -f run.bc
$ cat run.ll
If anyone improves upon this I'd love to hear about it! :-)
--
Dr Jon D Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/products/?e
next reply other threads:[~2007-11-26 21:01 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-11-26 20:52 Jon Harrop [this message]
2007-11-27 16:57 ` [Caml-list] " Tom Primožič
2007-12-02 10:00 ` Xavier Leroy
2007-12-02 16:21 ` Jon Harrop
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200711262052.18186.jon@ffconsultancy.com \
--to=jon@ffconsultancy.com \
--cc=caml-list@yquem.inria.fr \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox