From: Martin Jambon <martin.jambon@ens-lyon.org>
To: Andrej Bauer <andrej.bauer@andrej.com>
Cc: caml-list@inria.fr
Subject: Re: [Caml-list] ocamllex and python-style indentation
Date: Fri, 12 Jun 2009 14:56:21 +0200 [thread overview]
Message-ID: <4A325075.7040909@ens-lyon.org> (raw)
In-Reply-To: <7d8707de0906120120x10cc8fe0p54adbd189003f3da@mail.gmail.com>
Andrej Bauer wrote:
> Thanks to Andreas, I'll have a look at the "old" code.
>
> I think I understand the general idea of inserting "virtual" tokens,
> but the details confuse me still. So starting with
>
>> if True:
>> x = 3
>> y = (2 +
>> 4 + 5)
>> else:
>> x = 5
>> if False:
>> x = 8
>> z = 2
>
> Martin suggests the following:
>
>> {
>> if True:
>> ;
>> {
>> x = 3
>> ;
>> y = (2 +
>> ;
>> {
>> 4 + 5)
>> }
>> }
>> ;
>> else:
>> ;
>> {
>> x = 5
>> ;
>> if False:
>> ;
>> {
>> x = 8
>> ;
>> z = 2
>> }
>> }
>> }
>
> I have two questions. Notice that the { ... } and ( ... ) need not be
> correctly nested (in the top half), so how are we going to deal with
> this? The second question is, why are there the separators after and
> just before "else:". I would expect separators inside { .... }, but
> not around "else".
Original example:
if True:
x = 3
y = (2 +
4 + 5)
else:
x = 5
if False:
x = 8
z = 2
For pure indentation concerns, it is equivalent to:
x
x
x
x
x
x
x
x
x
Which is parsed into:
[
Line;
Block
[
Line;
Line;
Block
[
Line
]
];
Line;
Block
[
Line;
Line
];
Block
[
Line;
Line
]
]
I wrote the following code, which does the job. You might want to use
ocamllex instead in order to better manage newline characters (CRLF...), line
number directives and allow input from something else than a file or in_channel.
Note that the following must be rejected:
x
x
x (indentation here could be only 0, 4 or more)
But this is accepted:
x
x
x
x
You could also enforce that the indentation of a block must be the current
indentation + k, for example k=2 for the whole input.
(******************* indent_parser.ml **********************)
type indent_line = Lexing.position * (int * string)
type indent_tree =
[ `Line of (Lexing.position * string)
| `Block of (Lexing.position * indent_tree list) ]
let split s =
let len = String.length s in
let result = ref None in
try
for i = 0 to len - 1 do
if s.[i] <> ' ' then (
result := Some (i, String.sub s i (len - i));
raise Exit
)
done;
None
with Exit -> !result
let parse_lines fname ic : indent_line list =
let lines = ref [] in
let lnum = ref 0 in
try
while true do
let bol = pos_in ic in
let s = input_line ic in
incr lnum;
match split s with
None -> ()
| Some ((n, _) as x) ->
let pos = {
Lexing.pos_fname = fname;
pos_lnum = !lnum;
pos_bol = bol;
pos_cnum = bol + n;
} in
lines := (pos, x) :: !lines
done;
assert false
with End_of_file -> List.rev !lines
let parse_lines_from_file fname =
let ic = open_in fname in
try
let x = parse_lines fname ic in
close_in ic;
x
with e ->
close_in_noerr ic;
raise e
let error pos msg =
let cpos = pos.Lexing.pos_cnum - pos.Lexing.pos_bol in
let msg =
Printf.sprintf "File %S, line %i, characters %i-%i:\n%s"
pos.Lexing.pos_fname pos.Lexing.pos_lnum 0 cpos msg
in
failwith msg
let rec block_body cur_indent sub_indent cur_block l :
indent_tree list * indent_line list =
match l with
[] -> (List.rev cur_block, [])
| (pos, (n, s)) :: tl ->
if n = cur_indent then
block_body cur_indent sub_indent (`Line (pos, s) :: cur_block) tl
else if n > cur_indent then (
(match sub_indent with
None -> ()
| Some n' ->
if n <> n' then
error pos "Inconsistent indentation"
);
let sub_block, remaining =
block_body n None [ `Line (pos, s) ] tl in
block_body
cur_indent (Some n) (`Block (pos, sub_block) :: cur_block)
remaining
)
else
(List.rev cur_block, l)
let parse_indentation fname =
let l = parse_lines_from_file fname in
let result, remaining = block_body 0 None [] l in
assert (remaining = []);
result
let test () =
let fname = Filename.temp_file "test" ".ind" in
let oc = open_out fname in
output_string oc "
if True:
x = 3
y = (2 +
4 + 5)
else:
x = 5
if False:
x = 8
z = 2
";
close_out oc;
try
let result = parse_indentation fname in
Sys.remove fname;
result
with Failure msg as e ->
Printf.eprintf "%s\n%!" msg;
Sys.remove fname;
raise e
(*****************************************************************)
> Presumably the intermediate stage that I would preprocess the token
> stream would have to know about indentation levels. I have not tried
> this, but ocaml lexer will correctly match things like
>
> | '\n' [' ' '\t']* -> { INDENTATION (compute_indentation (lexeme buf)) }
>
> Yes?
Kind of. Don't discard the rest of the line...
If you have a choice, reject tabs.
Beware of CRLF newlines (\r\n) and missing \n before the end of file.
Also ocamllex does not keep track of newlines automatically. See the
documentation for Lexing.lexbuf.
Martin
--
http://mjambon.com/
next prev parent reply other threads:[~2009-06-12 13:01 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-06-11 12:57 Andrej Bauer
2009-06-11 13:12 ` [Caml-list] " yoann padioleau
2009-06-11 13:21 ` Andreas Rossberg
2009-06-11 13:44 ` Martin Jambon
2009-06-12 8:20 ` Andrej Bauer
2009-06-12 12:56 ` Martin Jambon [this message]
2009-06-12 13:34 ` Martin Jambon
2009-06-12 15:43 ` Andreas Rossberg
2009-06-30 18:58 ` Yitzhak Mandelbaum
2009-06-30 20:19 ` Mike Lin
2009-06-30 22:06 ` Andreas Rossberg
2009-07-01 2:13 ` Mike Lin
2009-07-01 7:31 ` Andreas Rossberg
2009-07-01 14:02 ` Mike Lin
2009-07-01 14:17 ` Andreas Rossberg
2009-07-01 14:21 ` Andreas Rossberg
2009-07-01 14:37 ` Mike Lin
2009-07-01 15:03 ` Sylvain Le Gall
2009-07-01 15:16 ` [Caml-list] " Andreas Rossberg
2009-07-01 16:26 ` Sylvain Le Gall
2009-07-01 15:19 ` [Caml-list] " Martin Jambon
2009-07-01 15:43 ` Andreas Rossberg
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4A325075.7040909@ens-lyon.org \
--to=martin.jambon@ens-lyon.org \
--cc=andrej.bauer@andrej.com \
--cc=caml-list@inria.fr \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox