* Inlining bigarray access
@ 2006-11-20 18:49 Dmitry Bely
0 siblings, 0 replies; only message in thread
From: Dmitry Bely @ 2006-11-20 18:49 UTC (permalink / raw)
To: caml-list
Here is the small code snippet:
open Bigarray
type floatarray = (float, float32_elt, c_layout) Array1.t
let get (a: floatarray) i = Array1.get a i
let test3 (a:floatarray) i =
Array1.get a (i-1) > 0.0 && Array1.get a i > 0.0 && Array1.get a (i+1) > 0.0
let test3m (a:floatarray) i =
get a (i-1) > 0.0 && get a i > 0.0 && get a (i+1) > 0.0
My impression was that test3 and test3m should be compiled to the same
machine code (x86). Strangely, this is not the case (for the
simplicity the code is generated with patched ocamlopt, allowing
-unsafe bigarray access and 686+ float comparison instructions):
.CODE
ALIGN 4
PUBLIC _camlBig__test3_187
_camlBig__test3_187:
sub esp, 8
L108:
mov ecx, eax
fldz
mov edx, ebx
add edx, -2
sar edx, 1
mov eax, DWORD PTR [ecx+4]
fld REAL4 PTR [eax+edx*4]
fcomip st(0), st(1)
fstp st(0)
jbe L104
fldz
mov edx, ebx
sar edx, 1
mov eax, DWORD PTR [ecx+4]
fld REAL4 PTR [eax+edx*4]
fcomip st(0), st(1)
fstp st(0)
jbe L105
fldz
add ebx, 2
sar ebx, 1
mov eax, DWORD PTR [ecx+4]
fld REAL4 PTR [eax+ebx*4]
fcomip st(0), st(1)
fstp st(0)
jbe L107
mov eax, 1
jmp L106
L107:
xor eax, eax
L106:
lea eax, DWORD PTR [eax+eax+1]
add esp, 8
ret
L105:
mov eax, 1
add esp, 8
ret
L104:
mov eax, 1
add esp, 8
ret
.CODE
ALIGN 4
PUBLIC _camlBig__test3m_190
_camlBig__test3m_190:
sub esp, 8
L113:
mov ecx, eax
mov edx, ebx
add edx, -2
L114: mov eax, _caml_young_ptr
sub eax, 12
mov _caml_young_ptr, eax
cmp eax, _caml_young_limit
jb L115
lea esi, [eax+4]
mov DWORD PTR [esi-4],2301
sar edx, 1
mov eax, DWORD PTR [ecx+4]
fld REAL4 PTR [eax+edx*4]
fstp REAL8 PTR [esi]
fld REAL8 PTR [esi]
fstp REAL8 PTR 0[esp]
fldz
fld REAL8 PTR 0[esp]
fcomip st(0), st(1)
fstp st(0)
jbe L109
fldz
mov edx, ebx
sar edx, 1
mov eax, DWORD PTR [ecx+4]
fld REAL4 PTR [eax+edx*4]
fcomip st(0), st(1)
fstp st(0)
jbe L110
add ebx, 2
L117: mov eax, _caml_young_ptr
sub eax, 12
mov _caml_young_ptr, eax
cmp eax, _caml_young_limit
jb L118
lea edx, [eax+4]
mov DWORD PTR [edx-4],2301
sar ebx, 1
mov eax, DWORD PTR [ecx+4]
fld REAL4 PTR [eax+ebx*4]
fstp REAL8 PTR [edx]
fld REAL8 PTR [edx]
fstp REAL8 PTR 0[esp]
fldz
fld REAL8 PTR 0[esp]
fcomip st(0), st(1)
fstp st(0)
jbe L112
mov eax, 1
jmp L111
L112:
xor eax, eax
L111:
sal eax, 1
inc eax
add esp, 8
ret
L110:
mov eax, 1
add esp, 8
ret
L109:
mov eax, 1
add esp, 8
ret
L118: call _caml_call_gc
L119: jmp L117
L115: call _caml_call_gc
L116: jmp L114
Could you explain, why the code is different and get3m allocates some
data on the heap? (Don't blame me for unsafe/686 patch - without it
the assembly is still different).
Another strange thing is that
let get: (floatarray -> int -> float) = Array1.get
is not inlined at all and compiled as a C call. Why?
- Dmitry Bely
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2006-11-20 18:49 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-11-20 18:49 Inlining bigarray access Dmitry Bely
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox