From: Jorgen Hermanrud Fjeld <jhf@hex.no>
To: caml-list@yquem.inria.fr
Subject: Haskell parser combinators in OCaml?
Date: Fri, 20 Oct 2006 00:01:31 +0200 [thread overview]
Message-ID: <20061019220131.GA18656@hex.no> (raw)
[-- Attachment #1: Type: text/plain, Size: 13931 bytes --]
Hi.
From the world of Haskell, the work of S. Doaitse Swierstra in the paper
"Combinator Parsers: From Toys to Tools"
"http://citeseer.ist.psu.edu/363886.html", introduces some very nice
combinator parsers that parse LALR(k) grammars, and give good error
messages.
I would love too express something equivalent in OCaml, but I'm not sure
how to translate the concepts used into concepts in OCaml.
I am hoping some of the type theorists out there would glance at the
paper, and bestow some reflection, advice or warning upon me.
There are several issues:
1) How to express the lazy lookahead data structure?
3) How to express the type of the parser in OCaml?
Some details:
1) The lazy data structure in 4.1 can not be expressed directly,
and I believe some kind of explicit fixed point is needed.
Would one need fixed points with deBruijn indexes?
Do you know of any similar examples that I may look at for
inspiration?
2) The parser has the haskell type
type Parser a =
forall b result .
Future b result
-> Stack a b
-> Errs
-> Input
-> Steps result
which I can not express in OCaml. My attempts at encoding this
using an encoding that express existential types, have so far not
worked out. I always end up with a type error, and do not see how
to better design it.
######## The type error
File "parser.ml", line 154, characters 21-26:
This field value has type
('a, 'a) future ->
(symbol, 'a) stack -> (errors -> errors) -> input -> ('a * errors) steps
which is less general than
'b 'c.
('b, 'c) future ->
('d, 'b) stack -> (errors -> errors) -> input -> ('c * errors) steps
######## Begin code
module BraunTree =
struct
type ('key,'value) braun_tree =
| Node of ('key,'value) braun_tree * ('key * 'value) * ('key,'value) braun_tree
| Nil
;;
let tree_of_list (l:('key*'value) list) : ('key,'value) braun_tree =
let rec tree_of_list len l =
match l with
| [] -> (Nil,[])
| (h::[]) -> (Node (Nil,h,Nil),[])
| (h::t) ->
let left_len = (len - 1) / 2 in
let right_len = len - 1 - left_len in
let (left_tree,left_list) = tree_of_list left_len l in
match left_list with
| [] -> assert false
| (left_head::left_tail) ->
let (right_tree,right_tail) = tree_of_list right_len left_tail in
(Node (left_tree,left_head,right_tree),right_tail)
in
let (tree,l) = tree_of_list (List.length l) l in
match l with
| [] -> tree
| _ -> assert false
;;
let find ~(key:'key) ~(tree:('key,'value) braun_tree) : 'value option =
let rec find tree =
match tree with
| Nil -> None
| Node (left,(found_key,value),right) ->
match compare key found_key with
| 0 -> Some value
| 1 -> find left
| -1 -> find right
| _ -> assert false
in
find tree
;;
end
module ContinuationTrieParser =
struct
type symbol = string
type input = symbol list
type 'result steps =
Ok of 'result steps
| Fail of 'result steps
| Stop of 'result
type ('a,'b) stack = 'a -> 'b;;
type ('cont,'result) future = 'cont -> (errors->errors) -> input -> 'result steps
and errors =
| Deleted of symbol * string * errors
| Inserted of symbol * string * errors
| Notused of string
type 'p automaton =
| Shift of 'p * (symbol * 'p automaton) list
| ShiftReduce of 'p automaton * 'p automaton
| Reduce of 'p
| Found of 'p * 'p automaton
type 'a combinator_parser = {
parse:'cont 'result.
('cont,'result) future-> ('a,'cont) stack -> (errors->errors) -> input -> ('result*errors) steps
}
type 'a parser_generator = {
automaton : ('a combinator_parser) automaton;
generated : 'a combinator_parser
}
exception Ambigous_grammar
;;
let rec best : 'result steps -> 'result steps -> 'result steps =
fun left right -> match (left,right) with
(Ok left,Ok right) -> Ok (best left right)
| (Fail left,Fail right) -> Fail (best left right)
| (Ok _,Fail _) -> left
| (Fail _,Ok _) -> right
| (Stop _,_) -> left
| (_,Stop _) -> right
;;
let best_parser (left:'a combinator_parser) (right:'a combinator_parser) : 'a combinator_parser =
let parse cont stack errors input =
best (left.parse cont stack errors input) (right.parse cont stack errors input)
in
{parse=parse}
;;
(** Also known as a catamorphism *)
let transform_automaton ((transform_shift
,transform_shiftreduce
,transform_reduce
,transform_found
): ((('p * (symbol*'p automaton) list) -> 'b)
*(('p automaton * 'p automaton) -> 'b)
*('p -> 'b)
*(('p * 'p automaton) -> 'b)))
(automaton:'a automaton) : 'b =
let rec transform (automaton:'a automaton) =
match automaton with
| Shift (p,choices) ->
let rec foreach choices collected =
match choices with
| [] -> collected
| ((symbol,choice)::tail) ->
let collected = (symbol,transform choice)::collected in
foreach tail collected
in
transform_shift (p,foreach choices [])
| ShiftReduce (shift,reduce) ->
transform_shiftreduce (transform shift,transform reduce)
| Reduce reduce -> transform_reduce reduce
| Found (found,more) -> transform_found (found,transform more)
in
transform automaton
;;
let map_automaton (f:'a->'b) (automaton: 'a automaton) : 'b automaton =
let transform_shift (p,choices) = Shift (f p,choices)
in
let transform_shiftreduce (shift,reduce) = ShiftReduce (shift,reduce)
in
let transform_reduce reduce = Reduce (f reduce)
in
let transform_found (found,more) = Found (f found,more)
in
transform_automaton
(transform_shift,transform_shiftreduce,transform_reduce,transform_found)
automaton
;;
let rec mkparser (automaton: string automaton) : string parser_generator =
let choose (input:input) : string combinator_parser =
let transform_shift ((p,choices)
:symbol * (symbol*symbol automaton) list) : string combinator_parser =
let table : (symbol,symbol automaton) BraunTree.braun_tree = BraunTree.tree_of_list choices in
let find key = BraunTree.find ~key ~tree:table in
let parse cont stack errors input : ('a*errors) steps =
match input with
| [] ->
let error =
errors (Inserted (p,"Insert at end of file",Notused ""))
in Stop (stack p,error)
| (h::t) -> begin
match find h with
| Some automaton ->
Ok ((mkparser automaton).generated.parse cont stack errors t)
| None ->
let errors error = errors (Deleted (h,"Deleted symbol",error))
in
let errors error = errors (Inserted (p,"Insert symbol",error))
in Fail (Fail (Stop (stack p,errors (Notused h))))
end
in
{parse=parse}
in
let transform_shiftreduce ((shift,reduce) : symbol automaton * symbol automaton ) =
let parse cont stack errors input =
(best_parser (mkparser shift).generated (mkparser reduce).generated).parse cont stack errors input
in
parse
in
let transform_reduce (reduce:symbol) =
let parse cont stack errors input = reduce cont stack errors input
in parse
in
let transform_found ((found,more):symbol*symbol automaton) =
let parse cont stack errors input = found cont stack errors input
in {parse=parse}
in
transform_automaton
(transform_shift,transform_shiftreduce,transform_reduce,transform_found)
automaton
in
let parse cont stack errors input =
(choose input).parse cont stack errors input
in
{automaton=automaton;parse=parse}
;;
(** <|> *)
let either : ('a parser_generator * 'a parser_generator) -> 'a parser_generator =
fun (p,q) ->
mkparser (merge_ch p.automaton q.automaton)
;;
let rec combine (lefts: (symbol * 'p automaton) list )
(rights: (symbol * 'p automaton) list )
: (symbol * 'p automaton) list =
match (lefts,rights) with
| ((((left_symbol,left_sentence) as left_head)::left_tail)
,(((right_symbol,right_sentence) as right_head)::right_tail)
) ->
begin
match compare left_symbol right_symbol with
| 1 -> left_head::(combine left_tail rights)
| -1 -> right_head::(combine lefts right_tail)
| 0 ->
let head = (left_symbol,either(left_sentence,right_sentence)) in
let tail = (combine left_tail right_tail) in
head::tail
| _ -> assert false
end
| ([],_) -> rights
| (_,[]) -> lefts
;;
(** <*> *)
let rec both : ('a parser_generator * 'a parser_generator) -> 'a parser_generator =
fun (p,q) ->
(** Use two combinator parsers in sequence
* a both for combinator parsers
*)
let both_combinator_parsers first second =
let parse cont stack errors input =
let stack f x = stack (f x) in
first.parse (second.parse cont) stack errors input
in
{parse=parse}
in
let transform_shift (p,choices) = Shift (both_combinator_parsers p q.generated,choices)
in
let transform_shiftreduce (shift,reduce) = merge_ch shift reduce
in
let transform_reduce reduce =
let worker x = fwby reduce x in
map_automaton worker q.automaton
in
let transform_found (found,more) = Found (both_combinator_parsers found q.generated,more)
in
let automaton = transform_automaton (transform_shift
,transform_shiftreduce
,transform_reduce
,transform_found) p.automaton
in
mkparser automaton
;;
let merge_ch left right =
match (left,right) with
| (Shift (left_parser,left_choices),Shift (right_parser,right_choices)) ->
let best = best_parser left_parser right_parser in
let choices = combine left_choices right_choices in
Shift (best,choices)
| (Shift _,ShiftReduce (shift,reduce)) ->
ShiftReduce (merge_ch left shift,reduce)
| (Shift _,Reduce _) -> ShiftReduce (left,right)
| (Shift _,Found (_,more)) ->merge_ch left more
| (Found (_,more),_) -> merge_ch more right
| (_,Shift _) -> merge_ch right left
| (ShiftReduce _,_)
| (Reduce _,_) -> raise Ambigous_grammar
;;
let symbol (a:symbol) : symbol combinator_parser =
let rec parse cont
stack
errors
input =
match input with
| x::xs ->
if a = x
then Ok (cont (stack a) errors xs)
else
let deleted_x =
let errors e = errors (Deleted (x,position xs,e)) in
parse cont stack errors xs in
let inserted_a =
let errors e = errors (Inserted (a,show_symbol a,e)) in
cont (stack x) errors input in
Fail (best deleted_x inserted_a)
| [] ->
let errors e = errors (Inserted (a,eof,e)) in
let inserted_a = cont (stack a) errors input in
Fail inserted_a
in
let accept cont stack errors input =
match input with
| [] -> assert false
| (x::xs) ->
assert (a = x) ;
Ok (cont (stack a) errors xs)
in
let shift = Shift ({parse=parse},[(a,Reduce {parse=accept})]) in
let found = Found ({parse=parse},shift) in
mkparser found
;;
let succeed f =
let parse cont stack errors input = cont (stack f) errors input in
mkparser (End {parse=parse})
;;
end
;;
######## End code
--
Sincerely | Homepage:
Jørgen | http://www.hex.no/jhf
| Public GPG key:
| http://www.hex.no/jhf/key.txt
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
next reply other threads:[~2006-10-19 22:01 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-10-19 22:01 Jorgen Hermanrud Fjeld [this message]
2006-10-20 2:43 ` [Caml-list] " Jacques Garrigue
2006-10-20 15:19 ` Tom
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061019220131.GA18656@hex.no \
--to=jhf@hex.no \
--cc=caml-list@yquem.inria.fr \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox