* Haskell parser combinators in OCaml?
@ 2006-10-19 22:01 Jorgen Hermanrud Fjeld
2006-10-20 2:43 ` [Caml-list] " Jacques Garrigue
2006-10-20 15:19 ` Tom
0 siblings, 2 replies; 3+ messages in thread
From: Jorgen Hermanrud Fjeld @ 2006-10-19 22:01 UTC (permalink / raw)
To: caml-list
[-- Attachment #1: Type: text/plain, Size: 13931 bytes --]
Hi.
From the world of Haskell, the work of S. Doaitse Swierstra in the paper
"Combinator Parsers: From Toys to Tools"
"http://citeseer.ist.psu.edu/363886.html", introduces some very nice
combinator parsers that parse LALR(k) grammars, and give good error
messages.
I would love too express something equivalent in OCaml, but I'm not sure
how to translate the concepts used into concepts in OCaml.
I am hoping some of the type theorists out there would glance at the
paper, and bestow some reflection, advice or warning upon me.
There are several issues:
1) How to express the lazy lookahead data structure?
3) How to express the type of the parser in OCaml?
Some details:
1) The lazy data structure in 4.1 can not be expressed directly,
and I believe some kind of explicit fixed point is needed.
Would one need fixed points with deBruijn indexes?
Do you know of any similar examples that I may look at for
inspiration?
2) The parser has the haskell type
type Parser a =
forall b result .
Future b result
-> Stack a b
-> Errs
-> Input
-> Steps result
which I can not express in OCaml. My attempts at encoding this
using an encoding that express existential types, have so far not
worked out. I always end up with a type error, and do not see how
to better design it.
######## The type error
File "parser.ml", line 154, characters 21-26:
This field value has type
('a, 'a) future ->
(symbol, 'a) stack -> (errors -> errors) -> input -> ('a * errors) steps
which is less general than
'b 'c.
('b, 'c) future ->
('d, 'b) stack -> (errors -> errors) -> input -> ('c * errors) steps
######## Begin code
module BraunTree =
struct
type ('key,'value) braun_tree =
| Node of ('key,'value) braun_tree * ('key * 'value) * ('key,'value) braun_tree
| Nil
;;
let tree_of_list (l:('key*'value) list) : ('key,'value) braun_tree =
let rec tree_of_list len l =
match l with
| [] -> (Nil,[])
| (h::[]) -> (Node (Nil,h,Nil),[])
| (h::t) ->
let left_len = (len - 1) / 2 in
let right_len = len - 1 - left_len in
let (left_tree,left_list) = tree_of_list left_len l in
match left_list with
| [] -> assert false
| (left_head::left_tail) ->
let (right_tree,right_tail) = tree_of_list right_len left_tail in
(Node (left_tree,left_head,right_tree),right_tail)
in
let (tree,l) = tree_of_list (List.length l) l in
match l with
| [] -> tree
| _ -> assert false
;;
let find ~(key:'key) ~(tree:('key,'value) braun_tree) : 'value option =
let rec find tree =
match tree with
| Nil -> None
| Node (left,(found_key,value),right) ->
match compare key found_key with
| 0 -> Some value
| 1 -> find left
| -1 -> find right
| _ -> assert false
in
find tree
;;
end
module ContinuationTrieParser =
struct
type symbol = string
type input = symbol list
type 'result steps =
Ok of 'result steps
| Fail of 'result steps
| Stop of 'result
type ('a,'b) stack = 'a -> 'b;;
type ('cont,'result) future = 'cont -> (errors->errors) -> input -> 'result steps
and errors =
| Deleted of symbol * string * errors
| Inserted of symbol * string * errors
| Notused of string
type 'p automaton =
| Shift of 'p * (symbol * 'p automaton) list
| ShiftReduce of 'p automaton * 'p automaton
| Reduce of 'p
| Found of 'p * 'p automaton
type 'a combinator_parser = {
parse:'cont 'result.
('cont,'result) future-> ('a,'cont) stack -> (errors->errors) -> input -> ('result*errors) steps
}
type 'a parser_generator = {
automaton : ('a combinator_parser) automaton;
generated : 'a combinator_parser
}
exception Ambigous_grammar
;;
let rec best : 'result steps -> 'result steps -> 'result steps =
fun left right -> match (left,right) with
(Ok left,Ok right) -> Ok (best left right)
| (Fail left,Fail right) -> Fail (best left right)
| (Ok _,Fail _) -> left
| (Fail _,Ok _) -> right
| (Stop _,_) -> left
| (_,Stop _) -> right
;;
let best_parser (left:'a combinator_parser) (right:'a combinator_parser) : 'a combinator_parser =
let parse cont stack errors input =
best (left.parse cont stack errors input) (right.parse cont stack errors input)
in
{parse=parse}
;;
(** Also known as a catamorphism *)
let transform_automaton ((transform_shift
,transform_shiftreduce
,transform_reduce
,transform_found
): ((('p * (symbol*'p automaton) list) -> 'b)
*(('p automaton * 'p automaton) -> 'b)
*('p -> 'b)
*(('p * 'p automaton) -> 'b)))
(automaton:'a automaton) : 'b =
let rec transform (automaton:'a automaton) =
match automaton with
| Shift (p,choices) ->
let rec foreach choices collected =
match choices with
| [] -> collected
| ((symbol,choice)::tail) ->
let collected = (symbol,transform choice)::collected in
foreach tail collected
in
transform_shift (p,foreach choices [])
| ShiftReduce (shift,reduce) ->
transform_shiftreduce (transform shift,transform reduce)
| Reduce reduce -> transform_reduce reduce
| Found (found,more) -> transform_found (found,transform more)
in
transform automaton
;;
let map_automaton (f:'a->'b) (automaton: 'a automaton) : 'b automaton =
let transform_shift (p,choices) = Shift (f p,choices)
in
let transform_shiftreduce (shift,reduce) = ShiftReduce (shift,reduce)
in
let transform_reduce reduce = Reduce (f reduce)
in
let transform_found (found,more) = Found (f found,more)
in
transform_automaton
(transform_shift,transform_shiftreduce,transform_reduce,transform_found)
automaton
;;
let rec mkparser (automaton: string automaton) : string parser_generator =
let choose (input:input) : string combinator_parser =
let transform_shift ((p,choices)
:symbol * (symbol*symbol automaton) list) : string combinator_parser =
let table : (symbol,symbol automaton) BraunTree.braun_tree = BraunTree.tree_of_list choices in
let find key = BraunTree.find ~key ~tree:table in
let parse cont stack errors input : ('a*errors) steps =
match input with
| [] ->
let error =
errors (Inserted (p,"Insert at end of file",Notused ""))
in Stop (stack p,error)
| (h::t) -> begin
match find h with
| Some automaton ->
Ok ((mkparser automaton).generated.parse cont stack errors t)
| None ->
let errors error = errors (Deleted (h,"Deleted symbol",error))
in
let errors error = errors (Inserted (p,"Insert symbol",error))
in Fail (Fail (Stop (stack p,errors (Notused h))))
end
in
{parse=parse}
in
let transform_shiftreduce ((shift,reduce) : symbol automaton * symbol automaton ) =
let parse cont stack errors input =
(best_parser (mkparser shift).generated (mkparser reduce).generated).parse cont stack errors input
in
parse
in
let transform_reduce (reduce:symbol) =
let parse cont stack errors input = reduce cont stack errors input
in parse
in
let transform_found ((found,more):symbol*symbol automaton) =
let parse cont stack errors input = found cont stack errors input
in {parse=parse}
in
transform_automaton
(transform_shift,transform_shiftreduce,transform_reduce,transform_found)
automaton
in
let parse cont stack errors input =
(choose input).parse cont stack errors input
in
{automaton=automaton;parse=parse}
;;
(** <|> *)
let either : ('a parser_generator * 'a parser_generator) -> 'a parser_generator =
fun (p,q) ->
mkparser (merge_ch p.automaton q.automaton)
;;
let rec combine (lefts: (symbol * 'p automaton) list )
(rights: (symbol * 'p automaton) list )
: (symbol * 'p automaton) list =
match (lefts,rights) with
| ((((left_symbol,left_sentence) as left_head)::left_tail)
,(((right_symbol,right_sentence) as right_head)::right_tail)
) ->
begin
match compare left_symbol right_symbol with
| 1 -> left_head::(combine left_tail rights)
| -1 -> right_head::(combine lefts right_tail)
| 0 ->
let head = (left_symbol,either(left_sentence,right_sentence)) in
let tail = (combine left_tail right_tail) in
head::tail
| _ -> assert false
end
| ([],_) -> rights
| (_,[]) -> lefts
;;
(** <*> *)
let rec both : ('a parser_generator * 'a parser_generator) -> 'a parser_generator =
fun (p,q) ->
(** Use two combinator parsers in sequence
* a both for combinator parsers
*)
let both_combinator_parsers first second =
let parse cont stack errors input =
let stack f x = stack (f x) in
first.parse (second.parse cont) stack errors input
in
{parse=parse}
in
let transform_shift (p,choices) = Shift (both_combinator_parsers p q.generated,choices)
in
let transform_shiftreduce (shift,reduce) = merge_ch shift reduce
in
let transform_reduce reduce =
let worker x = fwby reduce x in
map_automaton worker q.automaton
in
let transform_found (found,more) = Found (both_combinator_parsers found q.generated,more)
in
let automaton = transform_automaton (transform_shift
,transform_shiftreduce
,transform_reduce
,transform_found) p.automaton
in
mkparser automaton
;;
let merge_ch left right =
match (left,right) with
| (Shift (left_parser,left_choices),Shift (right_parser,right_choices)) ->
let best = best_parser left_parser right_parser in
let choices = combine left_choices right_choices in
Shift (best,choices)
| (Shift _,ShiftReduce (shift,reduce)) ->
ShiftReduce (merge_ch left shift,reduce)
| (Shift _,Reduce _) -> ShiftReduce (left,right)
| (Shift _,Found (_,more)) ->merge_ch left more
| (Found (_,more),_) -> merge_ch more right
| (_,Shift _) -> merge_ch right left
| (ShiftReduce _,_)
| (Reduce _,_) -> raise Ambigous_grammar
;;
let symbol (a:symbol) : symbol combinator_parser =
let rec parse cont
stack
errors
input =
match input with
| x::xs ->
if a = x
then Ok (cont (stack a) errors xs)
else
let deleted_x =
let errors e = errors (Deleted (x,position xs,e)) in
parse cont stack errors xs in
let inserted_a =
let errors e = errors (Inserted (a,show_symbol a,e)) in
cont (stack x) errors input in
Fail (best deleted_x inserted_a)
| [] ->
let errors e = errors (Inserted (a,eof,e)) in
let inserted_a = cont (stack a) errors input in
Fail inserted_a
in
let accept cont stack errors input =
match input with
| [] -> assert false
| (x::xs) ->
assert (a = x) ;
Ok (cont (stack a) errors xs)
in
let shift = Shift ({parse=parse},[(a,Reduce {parse=accept})]) in
let found = Found ({parse=parse},shift) in
mkparser found
;;
let succeed f =
let parse cont stack errors input = cont (stack f) errors input in
mkparser (End {parse=parse})
;;
end
;;
######## End code
--
Sincerely | Homepage:
Jørgen | http://www.hex.no/jhf
| Public GPG key:
| http://www.hex.no/jhf/key.txt
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Caml-list] Haskell parser combinators in OCaml?
2006-10-19 22:01 Haskell parser combinators in OCaml? Jorgen Hermanrud Fjeld
@ 2006-10-20 2:43 ` Jacques Garrigue
2006-10-20 15:19 ` Tom
1 sibling, 0 replies; 3+ messages in thread
From: Jacques Garrigue @ 2006-10-20 2:43 UTC (permalink / raw)
To: jhf; +Cc: caml-list
From: Jorgen Hermanrud Fjeld <jhf@hex.no>
> From the world of Haskell, the work of S. Doaitse Swierstra in the paper
> "Combinator Parsers: From Toys to Tools"
> "http://citeseer.ist.psu.edu/363886.html", introduces some very nice
> combinator parsers that parse LALR(k) grammars, and give good error
> messages.
>
> I would love too express something equivalent in OCaml, but I'm not sure
> how to translate the concepts used into concepts in OCaml.
>
> I am hoping some of the type theorists out there would glance at the
> paper, and bestow some reflection, advice or warning upon me.
>
> There are several issues:
> 1) How to express the lazy lookahead data structure?
> 3) How to express the type of the parser in OCaml?
>
> Some details:
> 1) The lazy data structure in 4.1 can not be expressed directly,
> and I believe some kind of explicit fixed point is needed.
> Would one need fixed points with deBruijn indexes?
> Do you know of any similar examples that I may look at for
> inspiration?
I don't see why you can't. OCaml has a lazy type, so you can define all
the lazy data structures you want easily. Of course you have to define
your own type of lazy lists, and insert lots of lazy's all over the
place, but there is no real difficulty.
> 2) The parser has the haskell type
> type Parser a =
> forall b result .
> Future b result
> -> Stack a b
> -> Errs
> -> Input
> -> Steps result
> which I can not express in OCaml. My attempts at encoding this
> using an encoding that express existential types, have so far not
> worked out. I always end up with a type error, and do not see how
> to better design it.
> ######## The type error
> File "parser.ml", line 154, characters 21-26:
> This field value has type
> ('a, 'a) future ->
> (symbol, 'a) stack -> (errors -> errors) -> input -> ('a * errors) steps
> which is less general than
> 'b 'c.
> ('b, 'c) future ->
> ('d, 'b) stack -> (errors -> errors) -> input -> ('c * errors) steps
Your encoding uses universal types, not existential. But this seems
the correct thing to do, as far as I can understand the code.
The reasons for the above type error seem double:
* You annotate you local "parse" function in "mkparser" with the type
('a * errors). The trouble is that named type variables in ocaml are
not locally polymorphic. So this is ok to use them for a toplevel
function, but not for local definitions (if you want them to be
polymorphic). Just remove the annotation, or replace 'a by _ (the
anonymous type variable).
* The definition of parse itself seems wrong:
Stop(stack p, errors) will have type 'cont steps, when you want
something of type 'result steps. If you unify the two, you don't
have enough polymorphism.
The first problem is easily solved, but I don't understand enough to
correct the second one. And the rest of the code does not typecheck
anyway.
Jacques Garrigue
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Caml-list] Haskell parser combinators in OCaml?
2006-10-19 22:01 Haskell parser combinators in OCaml? Jorgen Hermanrud Fjeld
2006-10-20 2:43 ` [Caml-list] " Jacques Garrigue
@ 2006-10-20 15:19 ` Tom
1 sibling, 0 replies; 3+ messages in thread
From: Tom @ 2006-10-20 15:19 UTC (permalink / raw)
To: caml-list
[-- Attachment #1: Type: text/plain, Size: 658 bytes --]
OCaml is able to infer types by itself, so what I suggest is that you don't
declare any local types, but only the global types (no (a:int) type
expressions, but only toplevel type 'a t = 'a -> 'a * 'a). Also, try
declaring only some of your types and define only some of your functions at
a time (using the toploop, interactive compiler) to see what are the
appropriate types the compiler infers.
Besides, you seem to have some errors in the code... for example:
let succeed f =
> let parse cont stack errors input = cont (stack f) errors input in
> mkparser (End {parse=parse})
> ;;
there is no End constructor declared.
Have fun, Tom
[-- Attachment #2: Type: text/html, Size: 938 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2006-10-20 15:19 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-10-19 22:01 Haskell parser combinators in OCaml? Jorgen Hermanrud Fjeld
2006-10-20 2:43 ` [Caml-list] " Jacques Garrigue
2006-10-20 15:19 ` Tom
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox