From: "Török Edwin" <edwintorok@gmail.com>
To: "Jon Harrop" <jon@ffconsultancy.com>
Cc: <caml-list@inria.fr>
Subject: Re: Value types (Was: [Caml-list] ocamlopt LLVM support)
Date: Sun, 12 Dec 2010 19:26:32 +0200 [thread overview]
Message-ID: <20101212192632.6536a647@deb0> (raw)
In-Reply-To: <038301cb9a20$13af7d10$3b0e7730$@com>
[-- Attachment #1: Type: text/plain, Size: 2265 bytes --]
On Sun, 12 Dec 2010 17:14:45 -0000
"Jon Harrop" <jon@ffconsultancy.com> wrote:
> Török Edwin wrote:
> > Problem #1: Int64.rem n 2 -> another idiv instruction
> >
> > A C compiler would optimize this to an 'and' instruction.
> > Change that to 'Int64.logand n 1L = 0L'/
>
> Yes. LLVM did that for me.
>
> > Problem #2: Int64.div n 2 -> idiv instruction.
> >
> > A C compiler would optimize this to a right shift. Changing that to
> > 'Int64.shift_right n 1' speeds
> > up the code.
>
> Yes. LLVM also did that for me. In fact, I have been bitten by
> ocamlopt not optimizing div and mod by a constant in real OCaml code
> before. This problem also turns up in the context of hash table
> implementations where you want to % by the length of the spine.
Do you really need to use Int64 for that though? Won't the 63-bit
version do?
>
> > With these changes I get almost the same speed as the C code:
> > $ ocamlopt x.ml -o x && time ./x
> > 837799
> > real 0m0.664s
> > user 0m0.667s
> > sys 0m0.000s
> >
> > $ gcc -O3 x.c && time ./a.out
> > 837799
> > real 0m0.635s
> > user 0m0.633s
> > sys 0m0.000s
> >
> > Here's the OCaml code:
> > let rec collatzLen(c, n) : int =
> > if n = 1L then c else
> > collatzLen (c+1, if Int64.logand n 1L = 0L then
> > Int64.shift_right n 1 else Int64.add (Int64.mul 3L n) 1L);;
> >
> > let rec loop(i, (nlen, n)) =
> > if i = 1L then n else
> > let ilen = collatzLen(1, i) in
> > let nlen, n = if ilen > nlen then ilen, i else nlen, n in
> > loop (Int64.sub i 1L, (nlen, n));;
> >
> > let _ =
> > let s = loop (1000000L, (1,1000000L)) in
> > print_int (Int64.to_int s);;
>
> I am unable to reproduce your results. Here, the time falls from 24s
> to 19.5s (using ocamlopt 3.12.0 on Intel x86) which is still 26×
> slower than HLVM.
Do you still have 'idiv' in the compiled code? See my attached
assembly, and compare it with yours please.
I was doing the test on 64-bit, with ocamlopt 3.11.2 and 3.12.0.
FWIW the original code took 2.8 seconds here, so only 4x slower (this
is an AMD Phenom II x6 1090T CPU). It probably depends how fast/slow
the 'idiv' is on your CPU.
--Edwin
[-- Attachment #2: x.s --]
[-- Type: application/octet-stream, Size: 4971 bytes --]
.section .rodata.cst8,"a",@progbits
.align 16
caml_negf_mask: .quad 0x8000000000000000, 0
.align 16
caml_absf_mask: .quad 0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
.data
.globl camlX__data_begin
camlX__data_begin:
.text
.globl camlX__code_begin
camlX__code_begin:
.data
.quad 2048
.globl camlX
camlX:
.space 16
.data
.quad 3319
camlX__2:
.quad caml_tuplify2
.quad -3
.quad camlX__loop_1033
.data
.quad 3319
camlX__3:
.quad caml_tuplify2
.quad -3
.quad camlX__collatzLen_1030
.data
.quad 2048
camlX__1:
.quad .L100004
.quad .L100005
.quad 2048
.L100005:
.quad 3
.quad .L100006
.quad 2303
.L100006:
.quad caml_int64_ops
.quad 1000000
.quad 2303
.L100004:
.quad caml_int64_ops
.quad 1000000
.text
.align 16
.globl camlX__collatzLen_1030
camlX__collatzLen_1030:
subq $8, %rsp
.L103:
movq %rax, %rdi
movq $1, %rsi
movq 8(%rbx), %rax
cmpq %rsi, %rax
jne .L102
movq %rdi, %rax
addq $8, %rsp
ret
.align 4
.L102:
xorq %rdx, %rdx
movq $1, %rsi
movq 8(%rbx), %rax
andq %rsi, %rax
cmpq %rdx, %rax
jne .L101
.L104: subq $24, %r15
movq caml_young_limit@GOTPCREL(%rip), %rax
cmpq (%rax), %r15
jb .L105
leaq 8(%r15), %rcx
movq $2303, -8(%rcx)
movq caml_int64_ops@GOTPCREL(%rip), %rax
movq %rax, (%rcx)
movq 8(%rbx), %rax
sarq $1, %rax
movq %rax, 8(%rcx)
jmp .L100
.align 4
.L101:
.L107: subq $24, %r15
movq caml_young_limit@GOTPCREL(%rip), %rax
cmpq (%rax), %r15
jb .L108
leaq 8(%r15), %rcx
movq $2303, -8(%rcx)
movq caml_int64_ops@GOTPCREL(%rip), %rax
movq %rax, (%rcx)
movq $1, %rdx
movq 8(%rbx), %rsi
movq $3, %rax
imulq %rsi, %rax
addq %rdx, %rax
movq %rax, 8(%rcx)
.L100:
movq %rdi, %rax
addq $2, %rax
movq %rcx, %rbx
jmp .L103
.L108: call caml_call_gc@PLT
.L109: jmp .L107
.L105: call caml_call_gc@PLT
.L106: jmp .L104
.type camlX__collatzLen_1030,@function
.size camlX__collatzLen_1030,.-camlX__collatzLen_1030
.text
.align 16
.globl camlX__loop_1033
camlX__loop_1033:
subq $24, %rsp
.L113:
movq %rax, %rdx
movq 8(%rbx), %rax
movq $1, %rsi
movq 8(%rdx), %rdi
cmpq %rsi, %rdi
jne .L112
addq $24, %rsp
ret
.align 4
.L112:
movq %rax, 8(%rsp)
movq %rdx, 16(%rsp)
movq (%rbx), %rax
movq %rax, 0(%rsp)
movq $3, %rax
movq %rdx, %rbx
call camlX__collatzLen_1030@PLT
.L114:
movq %rax, %rsi
movq 0(%rsp), %rbx
cmpq %rbx, %rsi
jle .L111
.L115: subq $24, %r15
movq caml_young_limit@GOTPCREL(%rip), %rax
cmpq (%rax), %r15
jb .L116
leaq 8(%r15), %rdi
movq $2048, -8(%rdi)
movq %rsi, (%rdi)
movq 16(%rsp), %rax
movq %rax, 8(%rdi)
jmp .L110
.align 4
.L111:
.L118: subq $24, %r15
movq caml_young_limit@GOTPCREL(%rip), %rax
cmpq (%rax), %r15
jb .L119
leaq 8(%r15), %rdi
movq $2048, -8(%rdi)
movq %rbx, (%rdi)
movq 8(%rsp), %rax
movq %rax, 8(%rdi)
.L110:
.L121: subq $48, %r15
movq caml_young_limit@GOTPCREL(%rip), %rax
cmpq (%rax), %r15
jb .L122
leaq 8(%r15), %rbx
movq $2048, -8(%rbx)
movq (%rdi), %rax
movq %rax, (%rbx)
movq 8(%rdi), %rax
movq %rax, 8(%rbx)
leaq 24(%rbx), %rax
movq $2303, -8(%rax)
movq caml_int64_ops@GOTPCREL(%rip), %rdi
movq %rdi, (%rax)
movq $1, %rsi
movq 16(%rsp), %rdi
movq 8(%rdi), %rdi
subq %rsi, %rdi
movq %rdi, 8(%rax)
jmp .L113
.L122: call caml_call_gc@PLT
.L123: jmp .L121
.L119: call caml_call_gc@PLT
.L120: jmp .L118
.L116: call caml_call_gc@PLT
.L117: jmp .L115
.type camlX__loop_1033,@function
.size camlX__loop_1033,.-camlX__loop_1033
.text
.align 16
.globl camlX__entry
camlX__entry:
subq $8, %rsp
.L124:
movq camlX__3@GOTPCREL(%rip), %rbx
movq camlX@GOTPCREL(%rip), %rax
movq %rbx, (%rax)
movq camlX__2@GOTPCREL(%rip), %rbx
movq camlX@GOTPCREL(%rip), %rax
movq %rbx, 8(%rax)
movq camlX@GOTPCREL(%rip), %rax
movq 8(%rax), %rbx
movq camlX__1@GOTPCREL(%rip), %rax
movq (%rbx), %rdi
call *%rdi
.L125:
movq 8(%rax), %rax
salq $1, %rax
orq $1, %rax
call camlPervasives__string_of_int_1130@PLT
.L126:
movq %rax, %rbx
movq camlPervasives@GOTPCREL(%rip), %rax
movq 184(%rax), %rax
call camlPervasives__output_string_1191@PLT
.L127:
movq $1, %rax
addq $8, %rsp
ret
.type camlX__entry,@function
.size camlX__entry,.-camlX__entry
.data
.text
.globl camlX__code_end
camlX__code_end:
.data
.globl camlX__data_end
camlX__data_end:
.long 0
.globl camlX__frametable
camlX__frametable:
.quad 9
.quad .L127
.word 17
.word 0
.align 8
.long (.L200000 - .) + 0xe0000000
.long 0x168120
.quad .L126
.word 17
.word 0
.align 8
.long (.L200000 - .) + 0xe0000000
.long 0x168270
.quad .L125
.word 16
.word 0
.align 8
.quad .L123
.word 32
.word 2
.word 16
.word 5
.align 8
.quad .L120
.word 32
.word 3
.word 3
.word 8
.word 16
.align 8
.quad .L117
.word 32
.word 2
.word 16
.word 7
.align 8
.quad .L114
.word 32
.word 3
.word 0
.word 8
.word 16
.align 8
.quad .L109
.word 16
.word 2
.word 3
.word 5
.align 8
.quad .L106
.word 16
.word 2
.word 3
.word 5
.align 8
.L200000:
.asciz "pervasives.ml"
.align 8
.section .note.GNU-stack,"",%progbits
next prev parent reply other threads:[~2010-12-12 17:26 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-12-12 14:54 Jon Harrop
2010-12-12 15:55 ` Török Edwin
2010-12-12 17:14 ` Jon Harrop
2010-12-12 17:26 ` Török Edwin [this message]
2010-12-12 18:01 ` Jon Harrop
2010-12-12 18:22 ` Török Edwin
2010-12-12 19:09 ` Benedikt Meurer
2010-12-12 19:20 ` John Carr
2010-12-14 9:43 ` Value types Goswin von Brederlow
2010-12-12 19:55 ` Value types (Was: [Caml-list] ocamlopt LLVM support) Török Edwin
2010-12-12 22:05 ` Jon Harrop
2010-12-12 22:27 ` Török Edwin
2010-12-12 23:41 ` Jon Harrop
2010-12-13 2:13 ` Eray Ozkural
2010-12-12 21:50 ` Jon Harrop
2010-12-13 8:43 ` Alain Frisch
2010-12-15 10:29 ` Benedikt Meurer
2010-12-15 13:15 ` Jon Harrop
2010-12-14 9:54 ` Value types Goswin von Brederlow
2010-12-12 19:53 ` Value types (Was: [Caml-list] ocamlopt LLVM support) Brian Hurt
2010-12-12 20:39 ` Jon Harrop
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101212192632.6536a647@deb0 \
--to=edwintorok@gmail.com \
--cc=caml-list@inria.fr \
--cc=jon@ffconsultancy.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox